Clean the environment.
Set locations, and the working directory.
A package-installation function.
Load those packages.
install.packages.auto("readr")
install.packages.auto("optparse")
install.packages.auto("tools")
install.packages.auto("dplyr")
install.packages.auto("tidyr")
install.packages.auto("naniar")
# To get 'data.table' with 'fwrite' to be able to directly write gzipped-files
# Ref: https://stackoverflow.com/questions/42788401/is-possible-to-use-fwrite-from-data-table-with-gzfile
# install.packages("data.table", repos = "https://Rdatatable.gitlab.io/data.table")
library(data.table)
install.packages.auto("tidyverse")
install.packages.auto("knitr")
install.packages.auto("DT")
install.packages.auto("MASS")
# install.packages.auto("Seurat") # latest version
# Install the devtools package from Hadley Wickham
install.packages.auto('devtools')
install.packages.auto("haven")
install.packages.auto("sjlabelled")
install.packages.auto("sjPlot")
install.packages.auto("labelled")
install.packages.auto("tableone")
install.packages.auto("ggpubr")We will create a datestamp and define the Utrecht Science Park Colour Scheme.
# Function to grep data from glm()/lm()
GLM.CON <- function(fit, DATASET, x_name, y, verbose=c(TRUE,FALSE)){
cat("Analyzing in dataset '", DATASET ,"' the association of '", x_name ,"' with '", y ,"' .\n")
if (nrow(summary(fit)$coefficients) == 1) {
output = c(DATASET, x_name, y, rep(NA,8))
cat("Model not fitted; probably singular.\n")
}else {
cat("Collecting data.\n\n")
effectsize = summary(fit)$coefficients[2,1]
SE = summary(fit)$coefficients[2,2]
OReffect = exp(summary(fit)$coefficients[2,1])
CI_low = exp(effectsize - 1.96 * SE)
CI_up = exp(effectsize + 1.96 * SE)
tvalue = summary(fit)$coefficients[2,3]
pvalue = summary(fit)$coefficients[2,4]
R = summary(fit)$r.squared
R.adj = summary(fit)$adj.r.squared
sample_size = nrow(model.frame(fit))
AE_N = AEDB.CEA.samplesize
Perc_Miss = 100 - ((sample_size * 100)/AE_N)
output = c(DATASET, x_name, y, effectsize, SE, OReffect, CI_low, CI_up, tvalue, pvalue, R, R.adj, AE_N, sample_size, Perc_Miss)
if (verbose == TRUE) {
cat("We have collected the following and summarize it in an object:\n")
cat("Dataset...................:", DATASET, "\n")
cat("Score/Exposure/biomarker..:", x_name, "\n")
cat("Trait/outcome.............:", y, "\n")
cat("Effect size...............:", round(effectsize, 6), "\n")
cat("Standard error............:", round(SE, 6), "\n")
cat("Odds ratio (effect size)..:", round(OReffect, 3), "\n")
cat("Lower 95% CI..............:", round(CI_low, 3), "\n")
cat("Upper 95% CI..............:", round(CI_up, 3), "\n")
cat("T-value...................:", round(tvalue, 6), "\n")
cat("P-value...................:", signif(pvalue, 8), "\n")
cat("R^2.......................:", round(R, 6), "\n")
cat("Adjusted r^2..............:", round(R.adj, 6), "\n")
cat("Sample size of AE DB......:", AE_N, "\n")
cat("Sample size of model......:", sample_size, "\n")
cat("Missing data %............:", round(Perc_Miss, 6), "\n")
} else {
cat("Collecting data in summary object.\n")
}
}
return(output)
print(output)
}
GLM.BIN <- function(fit, DATASET, x_name, y, verbose=c(TRUE,FALSE)){
cat("Analyzing in dataset '", DATASET ,"' the association of '", x_name ,"' with '", y ,"' ...\n")
if (nrow(summary(fit)$coefficients) == 1) {
output = c(DATASET, x_name, y, rep(NA,9))
cat("Model not fitted; probably singular.\n")
}else {
cat("Collecting data...\n")
effectsize = summary(fit)$coefficients[2,1]
SE = summary(fit)$coefficients[2,2]
OReffect = exp(summary(fit)$coefficients[2,1])
CI_low = exp(effectsize - 1.96 * SE)
CI_up = exp(effectsize + 1.96 * SE)
zvalue = summary(fit)$coefficients[2,3]
pvalue = summary(fit)$coefficients[2,4]
dev <- fit$deviance
nullDev <- fit$null.deviance
modelN <- length(fit$fitted.values)
R.l <- 1 - dev / nullDev
R.cs <- 1 - exp(-(nullDev - dev) / modelN)
R.n <- R.cs / (1 - (exp(-nullDev/modelN)))
sample_size = nrow(model.frame(fit))
AE_N = AEDB.CEA.samplesize
Perc_Miss = 100 - ((sample_size * 100)/AE_N)
output = c(DATASET, x_name, y, effectsize, SE, OReffect, CI_low, CI_up, zvalue, pvalue, R.l, R.cs, R.n, AE_N, sample_size, Perc_Miss)
if (verbose == TRUE) {
cat("We have collected the following and summarize it in an object:\n")
cat("Dataset...................:", DATASET, "\n")
cat("Score/Exposure/biomarker..:", x_name, "\n")
cat("Trait/outcome.............:", y, "\n")
cat("Effect size...............:", round(effectsize, 6), "\n")
cat("Standard error............:", round(SE, 6), "\n")
cat("Odds ratio (effect size)..:", round(OReffect, 3), "\n")
cat("Lower 95% CI..............:", round(CI_low, 3), "\n")
cat("Upper 95% CI..............:", round(CI_up, 3), "\n")
cat("Z-value...................:", round(zvalue, 6), "\n")
cat("P-value...................:", signif(pvalue, 8), "\n")
cat("Hosmer and Lemeshow r^2...:", round(R.l, 6), "\n")
cat("Cox and Snell r^2.........:", round(R.cs, 6), "\n")
cat("Nagelkerke's pseudo r^2...:", round(R.n, 6), "\n")
cat("Sample size of AE DB......:", AE_N, "\n")
cat("Sample size of model......:", sample_size, "\n")
cat("Missing data %............:", round(Perc_Miss, 6), "\n")
} else {
cat("Collecting data in summary object.\n")
}
}
return(output)
print(output)
}Using a Mendelian Randomization approach, we recently examined associations between the circulating levels of 41 cytokines and growth factors and the risk of stroke in the MEGASTROKE GWAS dataset (67,000 stroke cases and 450,000 controls) and found Monocyte chemoattractant protein-1 (MCP-1) as the cytokine showing the strongest association with stroke, particularly large artery and cardioembolic stroke (Georgakis et al., 2019a). Genetically elevated MCP-1 levels were also associated with a higher risk of coronary artery disease and myocardial infarction (Georgakis et al., 2019a). Further, in a meta-analysis of 6 observational population-based of longitudinal cohort studies we recently showed that baseline levels of MCP-1 were associated with a higher risk of ischemic stroke over follow-up (Georgakis et al., 2019b). While these data suggest a central role of MCP-1 in the pathogenesis of atherosclerosis, it remains unknown if MCP-1 levels in the blood really reflect MCP-1 activity. MCP-1 is expressed in the atherosclerotic plaque and attracts monocytes in the subendothelial space (Nelken et al., 1991; Papadopoulou et al., 2008; Takeya et al., 1993; Wilcox et al., 1994). Thus, MCP-1 levels in the plaque might more strongly reflect MCP-1 signaling. However, it remains unknown if MCP-1 plaque levels associate with plaque vulnerability or risk of cardiovascular events.
Against this background we now aim to make use of the data from Athero-Express Biobank Study to explore the associations of MCP-1 protein levels in the atherosclerotic plaques from patients undergoing carotid endarterectomy with phenotypes of plaque vulnerability and secondary vascular events over a follow-up of three years.
We used the Luminex-platform to measure atherosclerotic plaque proteins. Historically, this was done in two experiments:
Experiment 1:
This entails an experiment where also 20+ other interleukins, cyto- and chemokines, and metalloproteinases were measured. Part of these were measured using LUMINEX, some of them were measured using FACS, ELISA, and activity assays. These assays were run according to instructions from the producer in a research setting.
MCP1: Monocyte chemotactic protein 1 (a.k.a. CCL2; Entrez Gene: 6347) concentration in plaque [pg/mL]. Luminex platform.Experiment 2:
This entails an experiment where MCP1 was measured in a clinical diagnostic settings on a clinically validated Luminex-platform. - variable MCP1_pg_ml_2015: Monocyte chemotactic protein 1 (a.k.a. CCL2; Entrez Gene: 6347) concentration in plaque [pg/mL]. Luminex platform.
Loading Athero-Express clinical data.
require(haven)
# AEDB <- haven::read_sav(paste0(AEDB_loc, "/2019-3NEW_AtheroExpressDatabase_ScientificAE_02072019_IC_added.sav"))
AEDBraw <- haven::read_sav(paste0(AEDB_loc, "/2020_1_NEW_AtheroExpressDatabase_ScientificAE_16-03-2020.sav"))
head(AEDBraw)Loading Athero-Express plaque protein measurements from 2015.
library(openxlsx)
AEDB_Protein_2015 <- openxlsx::read.xlsx(paste0(AEDB_loc, "/_AE_Proteins/Cytokines_and_chemokines_2015/20200629_MPCF015-0024.xlsx"), sheet = "for_SPSS_R")
names(AEDB_Protein_2015)[names(AEDB_Protein_2015) == "SampleID"] <- "STUDY_NUMBER"
head(AEDB_Protein_2015)NALoading Athero-Express plasma protein measurements from 2019/2020 as measured using OLINK.
library(openxlsx)
AEDB_PlasmaProtein_OLINK_CVD2raw <- openxlsx::read.xlsx(paste0(AEDB_loc, "/_AE_OLINK/OLINK_ERA_UMC_AE_StroesRentate/20200706_AtheroExpress_OlinkData_forR.xlsx"), sheet = "CVD2_forR")
AEDB_PlasmaProtein_OLINK_CVD3raw <- openxlsx::read.xlsx(paste0(AEDB_loc, "/_AE_OLINK/OLINK_ERA_UMC_AE_StroesRentate/20200706_AtheroExpress_OlinkData_forR.xlsx"), sheet = "CVD3_forR")
AEDB_PlasmaProtein_OLINK_CMraw <- openxlsx::read.xlsx(paste0(AEDB_loc, "/_AE_OLINK/OLINK_ERA_UMC_AE_StroesRentate/20200706_AtheroExpress_OlinkData_forR.xlsx"), sheet = "CM_forR")
AEDB_PlasmaProtein_OLINK_ProteinInfo <- openxlsx::read.xlsx(paste0(AEDB_loc, "/_AE_OLINK/OLINK_ERA_UMC_AE_StroesRentate/20200706_AtheroExpress_OlinkData_forR.xlsx"), sheet = "ProteinInfo")
AEDB_PlasmaProtein_OLINK_CVD2 <- AEDB_PlasmaProtein_OLINK_CVD2raw %>% filter(QC_Warning_CVD2 == "Pass")
AEDB_PlasmaProtein_OLINK_CVD3 <- AEDB_PlasmaProtein_OLINK_CVD3raw %>% filter(QC_Warning_CVD3 == "Pass")
AEDB_PlasmaProtein_OLINK_CM <- AEDB_PlasmaProtein_OLINK_CMraw %>% filter(QC_Warning_CM == "Pass")
table(AEDB_PlasmaProtein_OLINK_CVD2raw$QC_Warning_CVD2)
Pass Warning
690 10
table(AEDB_PlasmaProtein_OLINK_CVD2$QC_Warning_CVD2)
Pass
690
table(AEDB_PlasmaProtein_OLINK_CVD3raw$QC_Warning_CVD3)
Pass
699
table(AEDB_PlasmaProtein_OLINK_CVD3$QC_Warning_CVD3)
Pass
699
table(AEDB_PlasmaProtein_OLINK_CMraw$QC_Warning_CM)
Pass Warning
691 9
table(AEDB_PlasmaProtein_OLINK_CM$QC_Warning_CM)
Pass
691
AEDB_PlasmaProtein_OLINK_CVD2$Plate_ID <- NULL
AEDB_PlasmaProtein_OLINK_CVD3$Plate_ID <- NULL
AEDB_PlasmaProtein_OLINK_CVD2$Order <- NULL
AEDB_PlasmaProtein_OLINK_CVD3$Order <- NULL
AEDB_PlasmaProtein_OLINK_CM$Order <- NULL
temp <- merge(AEDB_PlasmaProtein_OLINK_CVD2, AEDB_PlasmaProtein_OLINK_CVD3, by.x = "STUDY_NUMBER", by.y = "STUDY_NUMBER",
sort = FALSE, all.x = TRUE)
AEDB_PlasmaProtein_OLINK <- merge(temp, AEDB_PlasmaProtein_OLINK_CM, by.x = "STUDY_NUMBER", by.y = "STUDY_NUMBER",
sort = FALSE, all.x = TRUE)
AEDB_PlasmaProtein_OLINK$Plate_ID[AEDB_PlasmaProtein_OLINK$Plate_ID == "ERA_UMC_AE_cardiometabolic_plt1_29-10-19"] <- "plate 1"
AEDB_PlasmaProtein_OLINK$Plate_ID[AEDB_PlasmaProtein_OLINK$Plate_ID == "ERA_UMC_AE_Cardiometabolic_plt2"] <- "plate 2"
AEDB_PlasmaProtein_OLINK$Plate_ID[AEDB_PlasmaProtein_OLINK$Plate_ID == "ERA_UMC_AE_Cardiometabolic_plt3"] <- "plate 3"
AEDB_PlasmaProtein_OLINK$Plate_ID[AEDB_PlasmaProtein_OLINK$Plate_ID == "ERA_UMC_AE_Cardiometabolic_plt4"] <- "plate 4"
AEDB_PlasmaProtein_OLINK$Plate_ID[AEDB_PlasmaProtein_OLINK$Plate_ID == "ERA_UMC_AE_Cardiometabolic_plt5"] <- "plate 5"
AEDB_PlasmaProtein_OLINK$Plate_ID[AEDB_PlasmaProtein_OLINK$Plate_ID == "ERA_UMC_AE_Cardiometabolic_pl6"] <- "plate 6"
AEDB_PlasmaProtein_OLINK$Plate_ID[AEDB_PlasmaProtein_OLINK$Plate_ID == "SMART_CM_plt10"] <- "plate 10"
AEDB_PlasmaProtein_OLINK$Plate_ID[AEDB_PlasmaProtein_OLINK$Plate_ID == "SMART_plt11_CM"] <- "plate 11"
olink_proteins <- c("BMP6", "ANGPT1", "ADM", "CD40L", "SLAMF7", "PGF", "ADAMTS13", "BOC", "IL4RA", "SRC", "IL1ra", "IL6", "TNFRSF10A", "STK4", "IDUA",
"TNFRSF11A", "PAR1", "TRAILR2", "PRSS27", "TIE2", "TF", "IL1RL2", "PDGF_subunit_B", "IL27", "IL17D", "CXCL1", "LOX1", "Gal9", "GIF", "SCF",
"IL18", "FGF21", "PIgR", "RAGE", "SOD2", "CTRC", "FGF23", "SPON2", "GH", "FS", "GLO1", "CD84", "PAPPA", "SERPINA12", "REN", "DECR1",
"MERTK", "KIM1", "THBS2", "TM", "VSIG2", "AMBP", "PRELP", "HO1", "XCL1", "IL16", "SORT1", "CEACAM8", "PTX3", "PSGL1", "CCL17", "CCL3",
"MMP7", "IgG_Fc_receptor_IIb", "ITGB1BP2", "DCN", "Dkk1", "LPL", "PRSS8", "AGRP", "HBEGF", "GDF2", "FABP2", "THPO", "MARCO", "GT", "BNP",
"MMP12", "ACE2", "PDL2", "CTSL1", "hOSCAR", "TNFRSF13B", "TGM2", "LEP", "CA5A", "HSP_27", "CD4", "NEMO", "VEGFD", "PARP1", "HAOX1",
"TNFRSF14", "LDL_receptor", "ITGB2", "IL17RA", "TNFR2", "MMP9", "EPHB4", "IL2RA", "OPG", "ALCAM", "TFF3", "SELP", "CSTB", "MCP1", "CD163",
"Gal3", "GRN", "NTproBNP", "BLM_hydrolase", "PLC", "LTBR", "Notch_3", "TIMP4", "CNTN1", "CDH5", "TLT2", "FABP4", "TFPI", "PAI", "CCL24",
"TR", "TNFRSF10C", "GDF15", "SELE", "AZU1", "DLK1", "SPON1", "MPO", "CXCL16", "IL6RA", "RETN", "IGFBP1", "CHIT1", "TRAP", "GP6", "PSPD",
"PI3", "EpCAM", "APN", "AXL", "IL1RT1", "MMP2", "FAS", "MB", "TNFSF13B", "PRTN3", "PCSK9", "UPAR", "OPN", "CTSD", "PGLYRP1", "CPA1", "JAMA",
"Gal4", "IL1RT2", "SHPS1", "CCL15", "CASP3", "uPA", "CPB1", "CHI3L1", "ST2", "tPA", "SCGB3A2", "EGFR", "IGFBP7", "CD93", "IL18BP", "COL1A1",
"PON3", "CTSZ", "MMP3", "RARRES2", "ICAM2", "KLK6", "PDGF_subunit_A", "TNFR1", "IGFBP2", "vWF", "PECAM1", "MEPE", "CCL16", "PRCP", "CA1",
"ICAM1", "CHL1", "TGFBI", "ENG", "PLTP", "SERPINA7", "IGFBP3", "CR2", "SERPINA5", "FCGR3B", "IGFBP6", "CDH1", "CCL5", "CCL14", "GNLY",
"NOTCH1", "PAM", "PROC", "CST3", "NCAM1", "PCOLCE", "LILRB1", "MET", "LTBP2", "IL7R", "VCAM1", "SELL", "F11", "COMP", "CA4", "PTPRS",
"MBL2", "TIMP1", "ANGPTL3", "REG3A", "SOD1", "CD46", "ITGAM", "TNC", "NID1", "CFHR5", "SPARCL1", "PLXNB2", "MEGF9", "ANG", "ST6GAL1",
"DPP4", "REG1A", "QPCT", "FCN2", "FETUB", "CES1", "CRTAC1", "TCN2", "PRSS2", "ICAM3", "SAA4", "CNDP1", "FCGR2A", "NRP1", "EFEMP1", "TIMD4",
"FAP", "TIE1", "THBS4", "F7", "GP1BA", "LYVE1", "CA3", "TGFBR3", "DEFA1", "CD59", "APOM", "OSMR", "LILRB2", "UMOD", "CCL18", "COL18A1",
"LCN2", "KIT", "C1QTNF1", "AOC3", "GAS6", "IGLC2", "PLA2G7", "TNXB", "MFAP5", "VASN", "LILRB5", "C2")
length(olink_proteins)[1] 276
olink_proteins_rank = unlist(lapply(olink_proteins, paste0, "_rankNorm"))
olink_proteins_short <- c("MCP1")
olink_proteins_short_rank <- unlist(lapply(olink_proteins_short, paste0, "_rankNorm"))
rm(temp)We know that the proteins are not normally distributed and therefore we will standardise them as follows:
z = ( x - μ ) / σ
Where for each sample, x equals the value of the variable, μ (mu) equals the mean of x, and σ (sigma) equals the standard deviation of x.
for(PROTEIN in 1:length(olink_proteins_short)){
# AEDB_PlasmaProtein_OLINK$Z <- NULL
var.temp.z = olink_proteins_short_rank[PROTEIN]
var.temp = olink_proteins_short[PROTEIN]
cat(paste0("\nSelecting ", var.temp, " and standardising: ", var.temp.z,".\n"))
cat(paste0("* changing ", var.temp, " to numeric.\n"))
AEDB_PlasmaProtein_OLINK <- AEDB_PlasmaProtein_OLINK %>%
mutate_each_(funs(as.numeric), olink_proteins_short)
cat(paste0("* standardising ", var.temp,
" (mean: ",round(mean(!is.na(AEDB_PlasmaProtein_OLINK[,var.temp])), digits = 6),
", n = ",sum(!is.na(AEDB_PlasmaProtein_OLINK[,var.temp])),").\n"))
AEDB_PlasmaProtein_OLINK <- AEDB_PlasmaProtein_OLINK %>%
mutate_at(vars(var.temp),
list(Z = ~ (AEDB_PlasmaProtein_OLINK[,var.temp] - mean(AEDB_PlasmaProtein_OLINK[,var.temp], na.rm = TRUE))/sd(AEDB_PlasmaProtein_OLINK[,var.temp], na.rm = TRUE))
)
# str(AEDB_PlasmaProtein_OLINK$Z)
cat(paste0("* renaming Z to ", var.temp.z,".\n"))
AEDB_PlasmaProtein_OLINK[,var.temp.z] <- NULL
names(AEDB_PlasmaProtein_OLINK)[names(AEDB_PlasmaProtein_OLINK) == "Z"] <- var.temp.z
}
Selecting MCP1 and standardising: MCP1_rankNorm.
* changing MCP1 to numeric.
* standardising MCP1 (mean: 0.997101, n = 688).
* renaming Z to MCP1_rankNorm.
rm(var.temp, var.temp.z)Here we summarize some of these data in the subset of genetic data that passed QC.
for(PROTEIN in 1:length(olink_proteins_short)){
var.temp.z = olink_proteins_short_rank[PROTEIN]
var.temp = olink_proteins_short[PROTEIN]
cat(paste0("\nSummarising data for ",var.temp," [AU]; n = ",sum(!is.na(AEDB_PlasmaProtein_OLINK[,var.temp])),".\n"))
print(summary(AEDB_PlasmaProtein_OLINK[,var.temp]))
print(summary(AEDB_PlasmaProtein_OLINK[,var.temp.z]))
}
Summarising data for MCP1 [AU]; n = 688.
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.6649 3.3221 3.5619 3.6085 3.8117 12.2641 2
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-5.08789 -0.49497 -0.08056 0.00000 0.35114 14.96088 2
rm(var.temp, var.temp.z, PROTEIN)require("ggpubr")
require("ggsci")
# mypal = pal_npg("nrc", alpha = 0.7)(9)
# mypal
# ## [1] "#E64B35B2" "#4DBBD5B2" "#00A087B2" "#3C5488B2" "#F39B7FB2" "#8491B4B2"
# ## [7] "#91D1C2B2" "#DC0000B2" "#7E6148B2"
# library("scales")
# show_col(mypal)
for(PROTEIN in 1:length(olink_proteins_short)){
# metabolite_unit = ucorbioNMRDataDictionary[ucorbioNMRDataDictionary$Metabolite_NMR == NMRtargets[METABOLITE], "Concentration_NMR"]
cat(paste0("\nProcessing metabolite [ ",olink_proteins_short[PROTEIN]," (AU)].\n"))
var.temp = olink_proteins_short[PROTEIN]
var.temp.z = paste0(olink_proteins_short[PROTEIN],"_rankNorm")
dt.temp <- subset(AEDB_PlasmaProtein_OLINK, select = c("STUDY_NUMBER", var.temp, var.temp.z, "Plate_ID"))
dt.temp[,2] <- as.numeric(dt.temp[,2])
p1 <- ggpubr::gghistogram(dt.temp %>% filter(!is.na(Plate_ID)),
x = var.temp,
y = "..count..",
color = "#4DBBD5B2", fill = "#4DBBD5B2",
# palette = "npg",
rug = TRUE,
add = "mean",
xlab = paste0(var.temp," [AU]."),
ggtheme = theme_minimal())
my_comparisons <- list( c("plate 1", "plate 2"),
c("plate 1", "plate 3"),
c("plate 1", "plate 4"),
c("plate 1", "plate 5"),
c("plate 1", "plate 6"),
c("plate 1", "plate 10"),
c("plate 1", "plate 11") )
p2 <- ggpubr::ggboxplot(data = dt.temp %>% filter(!is.na(Plate_ID)),
x = "Plate_ID",
y = var.temp.z,
color = "Plate_ID",
palette = "npg",
add = c("mean", "jitter"),
# error.plot = "errorbar",
xlab = "plates used",
ylab = paste0(var.temp.z," [AU]."),
# ylim = c(0,4),
ggtheme = theme_minimal()) #+
# stat_compare_means(method = "anova") #+ # Add global p-value
# stat_compare_means(comparisons = my_comparisons) + # Add pairwise comparisons p-value
# stat_compare_means(label = "p.signif", method = "t.test", ref.group = "plate 1")
p3 <- ggpubr::gghistogram(dt.temp %>% filter(!is.na(Plate_ID)),
x = var.temp.z,
y = "..count..",
color = "#91D1C2B2", fill = "#91D1C2B2",
# palette = "npg",
rug = TRUE,
add = "mean",
xlab = paste0(var.temp.z," [AU]."),
ggtheme = theme_minimal())
require(patchwork)
# p4 <- ((p1 / p3 ) | (p2))
p4 <- ggpar(p1, legend = "" ) / ggpar(p2 + rotate_x_text(45), legend = "") | ggpar(p3, legend = "right")
print(p4)
ggsave(filename = paste0(QC_loc, "/",Today,".",PROJECTNAME,".OLINK.",var.temp,".png"),
plot = p4, device = "png", width = 20, height = 20)
}
Processing metabolite [ MCP1 (AU)].
Using `bins = 30` by default. Pick better value with the argument `bins`.geom_vline(): Ignoring `mapping` because `xintercept` was provided.geom_vline(): Ignoring `data` because `xintercept` was provided.`fun.y` is deprecated. Use `fun` instead.`fun.ymin` is deprecated. Use `fun.min` instead.`fun.ymax` is deprecated. Use `fun.max` instead.Using `bins = 30` by default. Pick better value with the argument `bins`.geom_vline(): Ignoring `mapping` because `xintercept` was provided.geom_vline(): Ignoring `data` because `xintercept` was provided.
# rm(my_comparisons,
# p1, p2, p3, p4,
# var.temp, var.temp.z, dt.temp, PROTEIN)We will merge these measurements to the AEDB for comparing pg/ug vs. pg/mL measurements of MCP1 - also in relation to plaque phenotypes. In addition we have more information the experiment and can correct for this.
names(AEDB_Protein_2015)[names(AEDB_Protein_2015) == "IL6_pg_ml"] <- "IL6_pg_ml_2015"
names(AEDB_Protein_2015)[names(AEDB_Protein_2015) == "IL6R_pg_ml"] <- "IL6R_pg_ml_2015"
names(AEDB_Protein_2015)[names(AEDB_Protein_2015) == "IL8_pg_ml"] <- "IL8_pg_ml_2015"
names(AEDB_Protein_2015)[names(AEDB_Protein_2015) == "MCP1_pg_ml"] <- "MCP1_pg_ml_2015"
names(AEDB_Protein_2015)[names(AEDB_Protein_2015) == "RANTES_pg_ml"] <- "RANTES_pg_ml_2015"
names(AEDB_Protein_2015)[names(AEDB_Protein_2015) == "PAI1_pg_ml"] <- "PAI1_pg_ml_2015"
names(AEDB_Protein_2015)[names(AEDB_Protein_2015) == "MCSF_pg_ml"] <- "MCSF_pg_ml_2015"
names(AEDB_Protein_2015)[names(AEDB_Protein_2015) == "Adiponectin_ng_ml"] <- "Adiponectin_ng_ml_2015"
names(AEDB_Protein_2015)[names(AEDB_Protein_2015) == "Segment_isolated_Tris"] <- "Segment_isolated_Tris_2015"
names(AEDB_Protein_2015)[names(AEDB_Protein_2015) == "Tris_protein_conc_ug_ml"] <- "Tris_protein_conc_ug_ml_2015"
temp <- subset(AEDB_Protein_2015, select = c("STUDY_NUMBER", "IL6_pg_ml_2015", "IL6R_pg_ml_2015", "IL8_pg_ml_2015", "MCP1_pg_ml_2015", "RANTES_pg_ml_2015", "PAI1_pg_ml_2015", "MCSF_pg_ml_2015", "Adiponectin_ng_ml_2015", "Segment_isolated_Tris_2015", "Tris_protein_conc_ug_ml_2015"))
temp2 <- subset(AEDB_PlasmaProtein_OLINK, select = c("STUDY_NUMBER", "MCP1", "MCP1_rankNorm", "Plate_ID"))
names(temp2)[names(temp2) == "MCP1"] <- "MCP1_plasma_olink"
names(temp2)[names(temp2) == "MCP1_rankNorm"] <- "MCP1_plasma_olink_rankNorm"
names(temp2)[names(temp2) == "Plate_ID"] <- "PlateID_plasma_olink"
AEDBraw2 <- merge(AEDBraw, temp, by.x = "STUDY_NUMBER", by.y = "STUDY_NUMBER", sort = FALSE,
all.x = TRUE)
AEDB <- merge(AEDBraw2, temp2, by.x = "STUDY_NUMBER", by.y = "STUDY_NUMBER", sort = FALSE,
all.x = TRUE)
rm(temp, temp2, AEDBraw2)
temp <- subset(AEDB, select = c("STUDY_NUMBER", "MCP1", "MCP1_pg_ug_2015", "MCP1_pg_ml_2015", "Segment_isolated_Tris_2015",
"MCP1_plasma_olink", "MCP1_plasma_olink_rankNorm", "PlateID_plasma_olink"))
dim(temp)[1] 3793 8
head(temp)rm(temp) We can examine the contents of the Athero-Express Biobank dataset to know what each variable is called, what class (type) it has, and what the variable description is.
Note: There is an excellent post on this: https://www.r-bloggers.com/working-with-spss-labels-in-r/.
AEDB %>% sjPlot::view_df(show.type = TRUE,
show.frq = TRUE,
show.prc = TRUE,
show.na = TRUE,
max.len = TRUE,
wrap.labels = 20,
verbose = FALSE,
use.viewer = FALSE,
file = paste0(OUT_loc, "/", Today, ".AEDB.dictionary.html")) yearpsy5 [326], yearchol3 [347], yearablo3 [419]
We need to be very strict in defining symptoms. Therefore we will fix a new variable that groups symptoms at inclusion.
Coding of symptoms is as follows:
We will group as follows in Symptoms.5G:
We will also group as follows in AsymptSympt:
We will also group as follows in AsymptSympt2G:
# Fix symptoms
attach(AEDB)
AEDB$sympt[is.na(AEDB$sympt)] <- -999
# Symptoms.5G
AEDB[,"Symptoms.5G"] <- NA
# AEDB$Symptoms.5G[sympt == "NA"] <- "Asymptomatic"
AEDB$Symptoms.5G[sympt == -999] <- NA
AEDB$Symptoms.5G[sympt == 0] <- "Asymptomatic"
AEDB$Symptoms.5G[sympt == 1 | sympt == 7 | sympt == 13] <- "TIA"
AEDB$Symptoms.5G[sympt == 2 | sympt == 3] <- "Stroke"
AEDB$Symptoms.5G[sympt == 4 | sympt == 14 | sympt == 15 ] <- "Ocular"
AEDB$Symptoms.5G[sympt == 8 | sympt == 11] <- "Retinal infarction"
AEDB$Symptoms.5G[sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Other"
# AsymptSympt
AEDB[,"AsymptSympt"] <- NA
AEDB$AsymptSympt[sympt == -999] <- NA
AEDB$AsymptSympt[sympt == 0] <- "Asymptomatic"
AEDB$AsymptSympt[sympt == 1 | sympt == 7 | sympt == 13 | sympt == 2 | sympt == 3] <- "Symptomatic"
AEDB$AsymptSympt[sympt == 4 | sympt == 14 | sympt == 15 | sympt == 8 | sympt == 11 | sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Ocular and others"
# AsymptSympt
AEDB[,"AsymptSympt2G"] <- NA
AEDB$AsymptSympt2G[sympt == -999] <- NA
AEDB$AsymptSympt2G[sympt == 0] <- "Asymptomatic"
AEDB$AsymptSympt2G[sympt == 1 | sympt == 7 | sympt == 13 | sympt == 2 | sympt == 3 | sympt == 4 | sympt == 14 | sympt == 15 | sympt == 8 | sympt == 11 | sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Symptomatic"
detach(AEDB)
# table(AEDB$sympt, useNA = "ifany")
# table(AEDB$AsymptSympt2G, useNA = "ifany")
# table(AEDB$Symptoms.5G, useNA = "ifany")
#
# table(AEDB$AsymptSympt2G, AEDB$sympt, useNA = "ifany")
# table(AEDB$Symptoms.5G, AEDB$sympt, useNA = "ifany")
table(AEDB$AsymptSympt2G, AEDB$Symptoms.5G, useNA = "ifany")
Asymptomatic Ocular Other Retinal infarction Stroke TIA <NA>
Asymptomatic 333 0 0 0 0 0 0
Symptomatic 0 417 119 43 733 1045 0
<NA> 0 0 0 0 0 0 1103
# AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "sympt", "Symptoms.5G", "AsymptSympt"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
#
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
#
# table(AEDB.temp$Symptoms.5G, AEDB.temp$AsymptSympt)
#
# rm(AEDB.temp)We will also fix the plaquephenotypes variable.
Coding of symptoms is as follows:
# Fix plaquephenotypes
attach(AEDB)
AEDB[,"OverallPlaquePhenotype"] <- NA
AEDB$OverallPlaquePhenotype[plaquephenotype == -999] <- NA
AEDB$OverallPlaquePhenotype[plaquephenotype == -999] <- NA
AEDB$OverallPlaquePhenotype[plaquephenotype == 1] <- "fibrous"
AEDB$OverallPlaquePhenotype[plaquephenotype == 2] <- "fibroatheromatous"
AEDB$OverallPlaquePhenotype[plaquephenotype == 3] <- "atheromatous"
detach(AEDB)
table(AEDB$OverallPlaquePhenotype)
atheromatous fibroatheromatous fibrous
550 843 1439
# AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "plaquephenotype", "OverallPlaquePhenotype"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
#
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
#
# rm(AEDB.temp)We will also fix the diabetes status variable. We define diabetes as history of a diagnosis and/or use of glucose-lowering medications.
# Fix diabetes
attach(AEDB)
AEDB[,"DiabetesStatus"] <- NA
AEDB$DiabetesStatus[DM.composite == -999] <- NA
AEDB$DiabetesStatus[DM.composite == 0] <- "Control (no Diabetes Dx/Med)"
AEDB$DiabetesStatus[DM.composite == 1] <- "Diabetes"
detach(AEDB)
table(AEDB$DM.composite)
0 1
2766 985
table(AEDB$DiabetesStatus)
Control (no Diabetes Dx/Med) Diabetes
2766 985
# AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "DM.composite", "DiabetesStatus"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$DiabetesStatus <- to_factor(AEDB.temp$DiabetesStatus)
#
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
#
# rm(AEDB.temp)We will also fix the smoking status variable. We are interested in whether someone never, ever or is currently (at the time of inclusion) smoking. This is based on the questionnaire.
diet801: are you a smoker?diet802: did you smoke in the past?We already have some variables indicating smoking status:
SmokingReported: patient has reported to smoke.SmokingYearOR: smoking in the year of surgery?SmokerCurrent: currently smoking?require(labelled)
AEDB$diet801 <- to_factor(AEDB$diet801)
AEDB$diet802 <- to_factor(AEDB$diet802)
AEDB$diet805 <- to_factor(AEDB$diet805)
AEDB$SmokingReported <- to_factor(AEDB$SmokingReported)
AEDB$SmokerCurrent <- to_factor(AEDB$SmokerCurrent)
AEDB$SmokingYearOR <- to_factor(AEDB$SmokingYearOR)
# table(AEDB$diet801)
# table(AEDB$diet802)
# table(AEDB$SmokingReported)
# table(AEDB$SmokerCurrent)
# table(AEDB$SmokingYearOR)
# table(AEDB$SmokingReported, AEDB$SmokerCurrent, useNA = "ifany", dnn = c("Reported smoking", "Current smoker"))
#
# table(AEDB$diet801, AEDB$diet802, useNA = "ifany", dnn = c("Smoker", "Past smoker"))
cat("\nFixing smoking status.\n")
Fixing smoking status.
attach(AEDB)
AEDB[,"SmokerStatus"] <- NA
AEDB$SmokerStatus[diet802 == "don't know"] <- "Never smoked"
AEDB$SmokerStatus[diet802 == "I still smoke"] <- "Current smoker"
AEDB$SmokerStatus[SmokerCurrent == "no" & diet802 == "no"] <- "Never smoked"
AEDB$SmokerStatus[SmokerCurrent == "no" & diet802 == "yes"] <- "Ex-smoker"
AEDB$SmokerStatus[SmokerCurrent == "yes"] <- "Current smoker"
AEDB$SmokerStatus[SmokerCurrent == "no data available/missing"] <- NA
# AEDB$SmokerStatus[is.na(SmokerCurrent)] <- "Never smoked"
detach(AEDB)
cat("\n* Current smoking status.\n")
* Current smoking status.
table(AEDB$SmokerCurrent,
useNA = "ifany",
dnn = c("Current smoker"))Current smoker
no data available/missing no yes <NA>
0 2364 1310 119
cat("\n* Updated smoking status.\n")
* Updated smoking status.
table(AEDB$SmokerStatus,
useNA = "ifany",
dnn = c("Updated smoking status"))Updated smoking status
Current smoker Ex-smoker Never smoked <NA>
1310 1814 389 280
cat("\n* Comparing to 'SmokerCurrent'.\n")
* Comparing to 'SmokerCurrent'.
table(AEDB$SmokerStatus, AEDB$SmokerCurrent,
useNA = "ifany",
dnn = c("Updated smoking status", "Current smoker")) Current smoker
Updated smoking status no data available/missing no yes <NA>
Current smoker 0 0 1310 0
Ex-smoker 0 1814 0 0
Never smoked 0 389 0 0
<NA> 0 161 0 119
# AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "DM.composite", "DiabetesStatus"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$DiabetesStatus <- to_factor(AEDB.temp$DiabetesStatus)
#
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
#
# rm(AEDB.temp)We will also fix the alcohol status variable.
# Fix diabetes
attach(AEDB)
AEDB[,"AlcoholUse"] <- NA
AEDB$AlcoholUse[diet810 == -999] <- NA
AEDB$AlcoholUse[diet810 == 0] <- "No"
AEDB$AlcoholUse[diet810 == 1] <- "Yes"
detach(AEDB)
table(AEDB$AlcoholUse)
No Yes
1238 2346
# AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "diet810", "AlcoholUse"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$AlcoholUse <- to_factor(AEDB.temp$AlcoholUse)
#
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
#
# rm(AEDB.temp)We will also fix a history of CAD, stroke or peripheral intervention status variable. This will be based on CAD_history, Stroke_history, and Peripheral.interv
# Fix diabetes
attach(AEDB)
AEDB[,"MedHx_CVD"] <- NA
AEDB$MedHx_CVD[CAD_history == 0 | Stroke_history == 0 | Peripheral.interv == 0] <- "No"
AEDB$MedHx_CVD[CAD_history == 1 | Stroke_history == 1 | Peripheral.interv == 1] <- "yes"
detach(AEDB)
table(AEDB$CAD_history)
0 1
2432 1285
table(AEDB$Stroke_history)
0 1
2764 948
table(AEDB$Peripheral.interv)
0 1
2581 1099
table(AEDB$MedHx_CVD)
No yes
1310 2476
# AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "diet810", "AlcoholUse"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$AlcoholUse <- to_factor(AEDB.temp$AlcoholUse)
#
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
#
# rm(AEDB.temp)We are interested in the following variables at baseline.
MCP1, and MCP1_pg_ml_2015)cat("===========================================================================================\n")===========================================================================================
cat("CREATE BASELINE TABLE\n")CREATE BASELINE TABLE
# Baseline table variables
basetable_vars = c("Hospital", "ORyear",
"Age", "Gender",
"TC_finalCU", "LDL_finalCU", "HDL_finalCU", "TG_finalCU",
"TC_final", "LDL_final", "HDL_final", "TG_final",
"hsCRP_plasma",
"systolic", "diastoli", "GFR_MDRD", "BMI",
"KDOQI", "BMI_WHO",
"SmokerStatus", "AlcoholUse",
"DiabetesStatus",
"Hypertension.selfreport", "Hypertension.selfreportdrug", "Hypertension.composite", "Hypertension.drugs",
"Med.anticoagulants", "Med.all.antiplatelet", "Med.Statin.LLD",
"Stroke_Dx", "sympt", "Symptoms.5G", "AsymptSympt", "AsymptSympt2G",
"restenos", "stenose",
"MedHx_CVD", "CAD_history", "PAOD", "Peripheral.interv",
"EP_composite", "EP_composite_time",
"macmean0", "smcmean0", "Macrophages.bin", "SMC.bin",
"neutrophils", "Mast_cells_plaque",
"IPH.bin", "vessel_density_averaged",
"Calc.bin", "Collagen.bin",
"Fat.bin_10", "Fat.bin_40", "OverallPlaquePhenotype",
"IL6", "IL6R_pg_ml_2015",
"MCP1", "MCP1_pg_ml_2015")
basetable_bin = c("Gender",
"KDOQI", "BMI_WHO",
"SmokerStatus", "AlcoholUse",
"DiabetesStatus",
"Hypertension.selfreport", "Hypertension.selfreportdrug", "Hypertension.composite", "Hypertension.drugs",
"Med.anticoagulants", "Med.all.antiplatelet", "Med.Statin.LLD",
"Stroke_Dx", "sympt", "Symptoms.5G", "AsymptSympt", "AsymptSympt2G",
"restenos", "stenose",
"CAD_history", "PAOD", "Peripheral.interv",
"EP_composite", "Macrophages.bin", "SMC.bin",
"IPH.bin",
"Calc.bin", "Collagen.bin",
"Fat.bin_10", "Fat.bin_40", "OverallPlaquePhenotype")
# basetable_bin
basetable_con = basetable_vars[!basetable_vars %in% basetable_bin]
# basetable_conShowing the baseline table of the whole Athero-Express Biobank.
# Create baseline tables
# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
AEDB.tableOne = print(CreateTableOne(vars = basetable_vars,
# factorVars = basetable_bin,
# strata = "Symptoms.4g",
data = AEDB, includeNA = TRUE),
nonnormal = c(), missing = TRUE,
quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE,
format = "pf",
contDigits = 3)[,1:3]
level Overall Missing
n 3793
Hospital % (freq) St. Antonius, Nieuwegein 45.7 (1735) 0.0
UMC Utrecht 54.3 (2058)
ORyear % (freq) No data available/missing 0.0 ( 0) 0.0
2002 2.5 ( 94)
2003 5.4 ( 204)
2004 7.6 ( 289)
2005 8.1 ( 309)
2006 7.5 ( 285)
2007 6.2 ( 234)
2008 5.9 ( 223)
2009 7.1 ( 268)
2010 8.1 ( 307)
2011 7.1 ( 270)
2012 8.2 ( 312)
2013 6.9 ( 262)
2014 7.9 ( 299)
2015 2.1 ( 79)
2016 3.3 ( 124)
2017 2.2 ( 85)
2018 2.1 ( 80)
2019 1.8 ( 69)
Age (mean (SD)) 68.906 (9.322) 0.0
Gender % (freq) female 30.6 (1161) 0.0
male 69.4 (2632)
TC_finalCU (mean (SD)) 185.256 (81.509) 46.8
LDL_finalCU (mean (SD)) 106.533 (40.725) 54.5
HDL_finalCU (mean (SD)) 46.591 (16.725) 51.1
TG_finalCU (mean (SD)) 154.212 (99.774) 51.8
TC_final (mean (SD)) 4.798 (2.111) 46.8
LDL_final (mean (SD)) 2.759 (1.055) 54.5
HDL_final (mean (SD)) 1.207 (0.433) 51.1
TG_final (mean (SD)) 1.743 (1.127) 51.8
hsCRP_plasma (mean (SD)) 19.231 (206.750) 60.6
systolic (mean (SD)) 150.901 (25.114) 13.5
diastoli (mean (SD)) 79.933 (21.847) 13.5
GFR_MDRD (mean (SD)) 74.844 (24.740) 6.5
BMI (mean (SD)) 26.336 (4.050) 7.5
KDOQI % (freq) No data available/missing 0.0 ( 0) 6.6
Normal kidney function 22.1 ( 839)
CKD 2 (Mild) 47.2 (1789)
CKD 3 (Moderate) 21.9 ( 831)
CKD 4 (Severe) 1.4 ( 53)
CKD 5 (Failure) 0.8 ( 32)
<NA> 6.6 ( 249)
BMI_WHO % (freq) No data available/missing 0.0 ( 0) 7.5
Underweight 1.2 ( 44)
Normal 35.2 (1336)
Overweight 42.1 (1595)
Obese 14.1 ( 533)
<NA> 7.5 ( 285)
SmokerStatus % (freq) Current smoker 34.5 (1310) 7.4
Ex-smoker 47.8 (1814)
Never smoked 10.3 ( 389)
<NA> 7.4 ( 280)
AlcoholUse % (freq) No 32.6 (1238) 5.5
Yes 61.9 (2346)
<NA> 5.5 ( 209)
DiabetesStatus % (freq) Control (no Diabetes Dx/Med) 72.9 (2766) 1.1
Diabetes 26.0 ( 985)
<NA> 1.1 ( 42)
Hypertension.selfreport % (freq) No data available/missing 0.0 ( 0) 4.0
no 23.7 ( 900)
yes 72.3 (2742)
<NA> 4.0 ( 151)
Hypertension.selfreportdrug % (freq) No data available/missing 0.0 ( 0) 5.5
no 28.6 (1086)
yes 65.9 (2500)
<NA> 5.5 ( 207)
Hypertension.composite % (freq) No data available/missing 0.0 ( 0) 1.3
no 13.3 ( 505)
yes 85.4 (3240)
<NA> 1.3 ( 48)
Hypertension.drugs % (freq) No data available/missing 0.0 ( 0) 1.5
no 21.0 ( 798)
yes 77.5 (2940)
<NA> 1.5 ( 55)
Med.anticoagulants % (freq) No data available/missing 0.0 ( 0) 1.6
no 85.6 (3248)
yes 12.8 ( 485)
<NA> 1.6 ( 60)
Med.all.antiplatelet % (freq) No data available/missing 0.0 ( 0) 1.6
no 13.7 ( 521)
yes 84.7 (3213)
<NA> 1.6 ( 59)
Med.Statin.LLD % (freq) No data available/missing 0.0 ( 0) 1.5
no 21.8 ( 826)
yes 76.7 (2911)
<NA> 1.5 ( 56)
Stroke_Dx % (freq) Missing 0.0 ( 0) 8.1
No stroke diagnosed 74.4 (2823)
Stroke diagnosed 17.5 ( 663)
<NA> 8.1 ( 307)
sympt % (freq) missing 29.1 (1103) 0.0
Asymptomatic 8.8 ( 333)
TIA 27.4 (1040)
minor stroke 12.1 ( 458)
Major stroke 7.3 ( 275)
Amaurosis fugax 10.5 ( 399)
Four vessel disease 1.1 ( 43)
Vertebrobasilary TIA 0.1 ( 5)
Retinal infarction 1.0 ( 37)
Symptomatic, but aspecific symtoms 1.6 ( 61)
Contralateral symptomatic occlusion 0.3 ( 12)
retinal infarction 0.2 ( 6)
armclaudication due to occlusion subclavian artery, CEA needed for bypass 0.0 ( 1)
retinal infarction + TIAs 0.0 ( 0)
Ocular ischemic syndrome 0.5 ( 18)
ischemisch glaucoom 0.0 ( 0)
subclavian steal syndrome 0.1 ( 2)
TGA 0.0 ( 0)
Symptoms.5G % (freq) Asymptomatic 8.8 ( 333) 29.1
Ocular 11.0 ( 417)
Other 3.1 ( 119)
Retinal infarction 1.1 ( 43)
Stroke 19.3 ( 733)
TIA 27.6 (1045)
<NA> 29.1 (1103)
AsymptSympt % (freq) Asymptomatic 8.8 ( 333) 29.1
Ocular and others 15.3 ( 579)
Symptomatic 46.9 (1778)
<NA> 29.1 (1103)
AsymptSympt2G % (freq) Asymptomatic 8.8 ( 333) 29.1
Symptomatic 62.1 (2357)
<NA> 29.1 (1103)
restenos % (freq) missing 0.0 ( 0) 4.0
de novo 87.0 (3299)
restenosis 8.8 ( 334)
stenose bij angioseal na PTCA 0.2 ( 7)
<NA> 4.0 ( 153)
stenose % (freq) missing 0.0 ( 0) 7.0
0-49% 0.7 ( 25)
50-70% 6.8 ( 257)
70-90% 35.6 (1349)
90-99% 29.9 (1133)
100% (Occlusion) 14.8 ( 560)
NA 0.1 ( 3)
50-99% 2.6 ( 99)
70-99% 2.6 ( 100)
99 0.1 ( 2)
<NA> 7.0 ( 265)
MedHx_CVD % (freq) No 34.5 (1310) 0.2
yes 65.3 (2476)
<NA> 0.2 ( 7)
CAD_history % (freq) Missing 0.0 ( 0) 2.0
No history CAD 64.1 (2432)
History CAD 33.9 (1285)
<NA> 2.0 ( 76)
PAOD % (freq) missing/no data 0.0 ( 0) 1.6
no 55.1 (2090)
yes 43.3 (1644)
<NA> 1.6 ( 59)
Peripheral.interv % (freq) no 68.0 (2581) 3.0
yes 29.0 (1099)
<NA> 3.0 ( 113)
EP_composite % (freq) No data available. 0.0 ( 0) 7.3
No composite endpoints 60.6 (2299)
Composite endpoints 32.1 (1218)
<NA> 7.3 ( 276)
EP_composite_time (mean (SD)) 2.267 (1.203) 7.4
macmean0 (mean (SD)) 0.656 (1.154) 32.4
smcmean0 (mean (SD)) 2.292 (6.618) 32.4
Macrophages.bin % (freq) no/minor 42.3 (1603) 25.7
moderate/heavy 32.1 (1216)
<NA> 25.7 ( 974)
SMC.bin % (freq) no/minor 22.9 ( 870) 25.3
moderate/heavy 51.8 (1964)
<NA> 25.3 ( 959)
neutrophils (mean (SD)) 162.985 (490.469) 91.0
Mast_cells_plaque (mean (SD)) 165.663 (163.421) 93.0
IPH.bin % (freq) no 32.3 (1225) 24.8
yes 42.9 (1628)
<NA> 24.8 ( 940)
vessel_density_averaged (mean (SD)) 8.030 (6.344) 48.0
Calc.bin % (freq) no/minor 37.9 (1438) 24.7
moderate/heavy 37.4 (1417)
<NA> 24.7 ( 938)
Collagen.bin % (freq) no/minor 14.2 ( 540) 25.2
moderate/heavy 60.6 (2299)
<NA> 25.2 ( 954)
Fat.bin_10 % (freq) <10% 32.3 (1226) 24.7
>10% 43.0 (1630)
<NA> 24.7 ( 937)
Fat.bin_40 % (freq) <40% 60.0 (2276) 24.7
>40% 15.3 ( 580)
<NA> 24.7 ( 937)
OverallPlaquePhenotype % (freq) atheromatous 14.5 ( 550) 25.3
fibroatheromatous 22.2 ( 843)
fibrous 37.9 (1439)
<NA> 25.3 ( 961)
IL6 (mean (SD)) 94.451 (278.490) 84.5
IL6R_pg_ml_2015 (mean (SD)) 219.949 (252.513) 67.0
MCP1 (mean (SD)) 130.926 (118.422) 83.7
MCP1_pg_ml_2015 (mean (SD)) 587.541 (843.110) 65.3
Showing the baseline table of the CEA patients in the Athero-Express Biobank.
# Create baseline tables
# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
AEDB.CEA.tableOne = print(CreateTableOne(vars = basetable_vars,
# factorVars = basetable_bin,
# strata = "Symptoms.4g",
data = AEDB.CEA, includeNA = TRUE),
nonnormal = c(), missing = TRUE,
quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE,
format = "pf",
contDigits = 3)[,1:3]
level Overall Missing
n 2423
Hospital % (freq) St. Antonius, Nieuwegein 39.1 ( 948) 0.0
UMC Utrecht 60.9 (1475)
ORyear % (freq) No data available/missing 0.0 ( 0) 0.0
2002 3.3 ( 81)
2003 6.5 ( 157)
2004 7.8 ( 190)
2005 7.6 ( 185)
2006 7.6 ( 183)
2007 6.3 ( 152)
2008 5.7 ( 138)
2009 7.5 ( 182)
2010 6.6 ( 159)
2011 6.8 ( 164)
2012 7.3 ( 176)
2013 6.1 ( 149)
2014 6.7 ( 163)
2015 3.1 ( 76)
2016 3.5 ( 85)
2017 2.7 ( 65)
2018 2.7 ( 66)
2019 2.1 ( 52)
Age (mean (SD)) 69.103 (9.302) 0.0
Gender % (freq) female 30.5 ( 739) 0.0
male 69.5 (1684)
TC_finalCU (mean (SD)) 184.852 (56.275) 38.0
LDL_finalCU (mean (SD)) 108.484 (41.794) 45.6
HDL_finalCU (mean (SD)) 46.432 (16.999) 41.7
TG_finalCU (mean (SD)) 151.189 (91.249) 42.8
TC_final (mean (SD)) 4.788 (1.458) 38.0
LDL_final (mean (SD)) 2.810 (1.082) 45.6
HDL_final (mean (SD)) 1.203 (0.440) 41.7
TG_final (mean (SD)) 1.708 (1.031) 42.8
hsCRP_plasma (mean (SD)) 19.887 (231.453) 52.9
systolic (mean (SD)) 152.408 (25.163) 11.3
diastoli (mean (SD)) 81.314 (25.178) 11.3
GFR_MDRD (mean (SD)) 73.115 (21.145) 5.4
BMI (mean (SD)) 26.488 (3.976) 5.9
KDOQI % (freq) No data available/missing 0.0 ( 0) 5.4
Normal kidney function 19.1 ( 462)
CKD 2 (Mild) 50.9 (1233)
CKD 3 (Moderate) 22.9 ( 554)
CKD 4 (Severe) 1.3 ( 32)
CKD 5 (Failure) 0.4 ( 10)
<NA> 5.4 ( 132)
BMI_WHO % (freq) No data available/missing 0.0 ( 0) 5.9
Underweight 1.0 ( 24)
Normal 35.1 ( 851)
Overweight 43.4 (1052)
Obese 14.5 ( 352)
<NA> 5.9 ( 144)
SmokerStatus % (freq) Current smoker 33.2 ( 805) 5.9
Ex-smoker 48.0 (1163)
Never smoked 12.9 ( 313)
<NA> 5.9 ( 142)
AlcoholUse % (freq) No 34.5 ( 835) 4.1
Yes 61.5 (1489)
<NA> 4.1 ( 99)
DiabetesStatus % (freq) Control (no Diabetes Dx/Med) 75.2 (1822) 1.1
Diabetes 23.7 ( 574)
<NA> 1.1 ( 27)
Hypertension.selfreport % (freq) No data available/missing 0.0 ( 0) 3.2
no 24.3 ( 590)
yes 72.4 (1755)
<NA> 3.2 ( 78)
Hypertension.selfreportdrug % (freq) No data available/missing 0.0 ( 0) 4.4
no 30.0 ( 726)
yes 65.6 (1590)
<NA> 4.4 ( 107)
Hypertension.composite % (freq) No data available/missing 0.0 ( 0) 1.2
no 14.6 ( 354)
yes 84.2 (2041)
<NA> 1.2 ( 28)
Hypertension.drugs % (freq) No data available/missing 0.0 ( 0) 1.4
no 23.4 ( 566)
yes 75.3 (1824)
<NA> 1.4 ( 33)
Med.anticoagulants % (freq) No data available/missing 0.0 ( 0) 1.6
no 87.3 (2116)
yes 11.1 ( 269)
<NA> 1.6 ( 38)
Med.all.antiplatelet % (freq) No data available/missing 0.0 ( 0) 1.5
no 12.2 ( 295)
yes 86.3 (2092)
<NA> 1.5 ( 36)
Med.Statin.LLD % (freq) No data available/missing 0.0 ( 0) 1.4
no 20.3 ( 491)
yes 78.3 (1898)
<NA> 1.4 ( 34)
Stroke_Dx % (freq) Missing 0.0 ( 0) 6.9
No stroke diagnosed 71.5 (1732)
Stroke diagnosed 21.7 ( 525)
<NA> 6.9 ( 166)
sympt % (freq) missing 0.0 ( 0) 0.0
Asymptomatic 11.1 ( 270)
TIA 39.7 ( 961)
minor stroke 16.8 ( 407)
Major stroke 9.9 ( 239)
Amaurosis fugax 15.7 ( 380)
Four vessel disease 1.6 ( 38)
Vertebrobasilary TIA 0.2 ( 5)
Retinal infarction 1.4 ( 34)
Symptomatic, but aspecific symtoms 2.2 ( 53)
Contralateral symptomatic occlusion 0.5 ( 11)
retinal infarction 0.2 ( 6)
armclaudication due to occlusion subclavian artery, CEA needed for bypass 0.0 ( 1)
retinal infarction + TIAs 0.0 ( 0)
Ocular ischemic syndrome 0.7 ( 16)
ischemisch glaucoom 0.0 ( 0)
subclavian steal syndrome 0.1 ( 2)
TGA 0.0 ( 0)
Symptoms.5G % (freq) Asymptomatic 11.1 ( 270) 0.0
Ocular 16.3 ( 396)
Other 4.3 ( 105)
Retinal infarction 1.7 ( 40)
Stroke 26.7 ( 646)
TIA 39.9 ( 966)
AsymptSympt % (freq) Asymptomatic 11.1 ( 270) 0.0
Ocular and others 22.3 ( 541)
Symptomatic 66.5 (1612)
AsymptSympt2G % (freq) Asymptomatic 11.1 ( 270) 0.0
Symptomatic 88.9 (2153)
restenos % (freq) missing 0.0 ( 0) 1.4
de novo 93.7 (2270)
restenosis 4.9 ( 118)
stenose bij angioseal na PTCA 0.0 ( 0)
<NA> 1.4 ( 35)
stenose % (freq) missing 0.0 ( 0) 2.0
0-49% 0.5 ( 13)
50-70% 7.8 ( 190)
70-90% 46.5 (1127)
90-99% 38.3 ( 928)
100% (Occlusion) 1.3 ( 31)
NA 0.0 ( 1)
50-99% 0.6 ( 15)
70-99% 2.8 ( 68)
99 0.1 ( 2)
<NA> 2.0 ( 48)
MedHx_CVD % (freq) No 36.9 ( 893) 0.0
yes 63.1 (1530)
CAD_history % (freq) Missing 0.0 ( 0) 1.9
No history CAD 66.9 (1620)
History CAD 31.2 ( 756)
<NA> 1.9 ( 47)
PAOD % (freq) missing/no data 0.0 ( 0) 2.0
no 77.5 (1878)
yes 20.5 ( 497)
<NA> 2.0 ( 48)
Peripheral.interv % (freq) no 77.2 (1870) 2.9
yes 19.9 ( 482)
<NA> 2.9 ( 71)
EP_composite % (freq) No data available. 0.0 ( 0) 5.0
No composite endpoints 70.6 (1711)
Composite endpoints 24.3 ( 590)
<NA> 5.0 ( 122)
EP_composite_time (mean (SD)) 2.479 (1.109) 5.2
macmean0 (mean (SD)) 0.767 (1.183) 29.7
smcmean0 (mean (SD)) 1.985 (2.380) 29.9
Macrophages.bin % (freq) no/minor 35.0 ( 847) 24.1
moderate/heavy 40.9 ( 992)
<NA> 24.1 ( 584)
SMC.bin % (freq) no/minor 24.8 ( 602) 23.8
moderate/heavy 51.3 (1244)
<NA> 23.8 ( 577)
neutrophils (mean (SD)) 147.151 (419.998) 87.5
Mast_cells_plaque (mean (SD)) 164.488 (163.771) 90.0
IPH.bin % (freq) no 30.8 ( 746) 23.5
yes 45.7 (1108)
<NA> 23.5 ( 569)
vessel_density_averaged (mean (SD)) 8.317 (6.384) 35.1
Calc.bin % (freq) no/minor 41.6 (1007) 23.4
moderate/heavy 35.1 ( 850)
<NA> 23.4 ( 566)
Collagen.bin % (freq) no/minor 15.8 ( 382) 23.6
moderate/heavy 60.6 (1469)
<NA> 23.6 ( 572)
Fat.bin_10 % (freq) <10% 22.4 ( 542) 23.3
>10% 54.3 (1316)
<NA> 23.3 ( 565)
Fat.bin_40 % (freq) <40% 56.2 (1362) 23.3
>40% 20.5 ( 496)
<NA> 23.3 ( 565)
OverallPlaquePhenotype % (freq) atheromatous 19.8 ( 480) 23.7
fibroatheromatous 27.8 ( 674)
fibrous 28.7 ( 695)
<NA> 23.7 ( 574)
IL6 (mean (SD)) 98.812 (292.457) 78.2
IL6R_pg_ml_2015 (mean (SD)) 217.355 (248.551) 52.4
MCP1 (mean (SD)) 135.763 (120.028) 76.7
MCP1_pg_ml_2015 (mean (SD)) 600.444 (858.416) 50.5
MCP1_pg_ml_2015Showing the baseline table of the CEA patients in the Athero-Express Biobank with MCP1_pg_ml_2015.
AEDB.CEA.subset <- subset(AEDB.CEA, !is.na(MCP1_pg_ml_2015))
AEDB.CEA.subset.AsymptSympt.tableOne = print(CreateTableOne(vars = basetable_vars,
# factorVars = basetable_bin,
strata = "AsymptSympt2G",
data = AEDB.CEA.subset, includeNA = TRUE),
nonnormal = c(), missing = TRUE,
quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE,
format = "pf",
contDigits = 3)[,1:6] Stratified by AsymptSympt2G
level Asymptomatic
n 131
Hospital % (freq) St. Antonius, Nieuwegein 50.4 ( 66)
UMC Utrecht 49.6 ( 65)
ORyear % (freq) No data available/missing 0.0 ( 0)
2002 10.7 ( 14)
2003 7.6 ( 10)
2004 17.6 ( 23)
2005 9.9 ( 13)
2006 10.7 ( 14)
2007 11.5 ( 15)
2008 7.6 ( 10)
2009 7.6 ( 10)
2010 5.3 ( 7)
2011 6.1 ( 8)
2012 5.3 ( 7)
2013 0.0 ( 0)
2014 0.0 ( 0)
2015 0.0 ( 0)
2016 0.0 ( 0)
2017 0.0 ( 0)
2018 0.0 ( 0)
2019 0.0 ( 0)
Age (mean (SD)) 66.237 (9.184)
Gender % (freq) female 23.7 ( 31)
male 76.3 (100)
TC_finalCU (mean (SD)) 175.987 (47.184)
LDL_finalCU (mean (SD)) 102.781 (38.324)
HDL_finalCU (mean (SD)) 43.701 (14.754)
TG_finalCU (mean (SD)) 157.650 (89.246)
TC_final (mean (SD)) 4.558 (1.222)
LDL_final (mean (SD)) 2.662 (0.993)
HDL_final (mean (SD)) 1.132 (0.382)
TG_final (mean (SD)) 1.781 (1.008)
hsCRP_plasma (mean (SD)) 5.688 (19.440)
systolic (mean (SD)) 153.577 (24.327)
diastoli (mean (SD)) 80.622 (13.225)
GFR_MDRD (mean (SD)) 71.026 (20.424)
BMI (mean (SD)) 26.623 (3.391)
KDOQI % (freq) No data available/missing 0.0 ( 0)
Normal kidney function 17.6 ( 23)
CKD 2 (Mild) 49.6 ( 65)
CKD 3 (Moderate) 28.2 ( 37)
CKD 4 (Severe) 0.0 ( 0)
CKD 5 (Failure) 0.8 ( 1)
<NA> 3.8 ( 5)
BMI_WHO % (freq) No data available/missing 0.0 ( 0)
Underweight 0.8 ( 1)
Normal 32.8 ( 43)
Overweight 51.1 ( 67)
Obese 13.0 ( 17)
<NA> 2.3 ( 3)
SmokerStatus % (freq) Current smoker 30.5 ( 40)
Ex-smoker 57.3 ( 75)
Never smoked 9.9 ( 13)
<NA> 2.3 ( 3)
AlcoholUse % (freq) No 38.2 ( 50)
Yes 59.5 ( 78)
<NA> 2.3 ( 3)
DiabetesStatus % (freq) Control (no Diabetes Dx/Med) 76.3 (100)
Diabetes 23.7 ( 31)
Hypertension.selfreport % (freq) No data available/missing 0.0 ( 0)
no 23.7 ( 31)
yes 75.6 ( 99)
<NA> 0.8 ( 1)
Hypertension.selfreportdrug % (freq) No data available/missing 0.0 ( 0)
no 30.5 ( 40)
yes 67.9 ( 89)
<NA> 1.5 ( 2)
Hypertension.composite % (freq) No data available/missing 0.0 ( 0)
no 9.9 ( 13)
yes 90.1 (118)
Hypertension.drugs % (freq) No data available/missing 0.0 ( 0)
no 14.5 ( 19)
yes 85.5 (112)
<NA> 0.0 ( 0)
Med.anticoagulants % (freq) No data available/missing 0.0 ( 0)
no 89.3 (117)
yes 10.7 ( 14)
<NA> 0.0 ( 0)
Med.all.antiplatelet % (freq) No data available/missing 0.0 ( 0)
no 6.1 ( 8)
yes 93.1 (122)
<NA> 0.8 ( 1)
Med.Statin.LLD % (freq) No data available/missing 0.0 ( 0)
no 15.3 ( 20)
yes 84.7 (111)
<NA> 0.0 ( 0)
Stroke_Dx % (freq) Missing 0.0 ( 0)
No stroke diagnosed 80.2 (105)
Stroke diagnosed 14.5 ( 19)
<NA> 5.3 ( 7)
sympt % (freq) missing 0.0 ( 0)
Asymptomatic 100.0 (131)
TIA 0.0 ( 0)
minor stroke 0.0 ( 0)
Major stroke 0.0 ( 0)
Amaurosis fugax 0.0 ( 0)
Four vessel disease 0.0 ( 0)
Vertebrobasilary TIA 0.0 ( 0)
Retinal infarction 0.0 ( 0)
Symptomatic, but aspecific symtoms 0.0 ( 0)
Contralateral symptomatic occlusion 0.0 ( 0)
retinal infarction 0.0 ( 0)
armclaudication due to occlusion subclavian artery, CEA needed for bypass 0.0 ( 0)
retinal infarction + TIAs 0.0 ( 0)
Ocular ischemic syndrome 0.0 ( 0)
ischemisch glaucoom 0.0 ( 0)
subclavian steal syndrome 0.0 ( 0)
TGA 0.0 ( 0)
Symptoms.5G % (freq) Asymptomatic 100.0 (131)
Ocular 0.0 ( 0)
Other 0.0 ( 0)
Retinal infarction 0.0 ( 0)
Stroke 0.0 ( 0)
TIA 0.0 ( 0)
AsymptSympt % (freq) Asymptomatic 100.0 (131)
Ocular and others 0.0 ( 0)
Symptomatic 0.0 ( 0)
AsymptSympt2G % (freq) Asymptomatic 100.0 (131)
Symptomatic 0.0 ( 0)
restenos % (freq) missing 0.0 ( 0)
de novo 93.9 (123)
restenosis 2.3 ( 3)
stenose bij angioseal na PTCA 0.0 ( 0)
<NA> 3.8 ( 5)
stenose % (freq) missing 0.0 ( 0)
0-49% 0.0 ( 0)
50-70% 3.1 ( 4)
70-90% 51.1 ( 67)
90-99% 41.2 ( 54)
100% (Occlusion) 0.0 ( 0)
NA 0.0 ( 0)
50-99% 0.8 ( 1)
70-99% 0.0 ( 0)
99 0.0 ( 0)
<NA> 3.8 ( 5)
MedHx_CVD % (freq) No 38.9 ( 51)
yes 61.1 ( 80)
CAD_history % (freq) Missing 0.0 ( 0)
No history CAD 61.8 ( 81)
History CAD 38.2 ( 50)
PAOD % (freq) missing/no data 0.0 ( 0)
no 74.0 ( 97)
yes 26.0 ( 34)
Peripheral.interv % (freq) no 74.0 ( 97)
yes 26.0 ( 34)
<NA> 0.0 ( 0)
EP_composite % (freq) No data available. 0.0 ( 0)
No composite endpoints 67.2 ( 88)
Composite endpoints 32.8 ( 43)
<NA> 0.0 ( 0)
EP_composite_time (mean (SD)) 2.614 (0.931)
macmean0 (mean (SD)) 0.837 (1.088)
smcmean0 (mean (SD)) 2.152 (1.861)
Macrophages.bin % (freq) no/minor 48.9 ( 64)
moderate/heavy 50.4 ( 66)
<NA> 0.8 ( 1)
SMC.bin % (freq) no/minor 22.9 ( 30)
moderate/heavy 75.6 ( 99)
<NA> 1.5 ( 2)
neutrophils (mean (SD)) 157.643 (507.380)
Mast_cells_plaque (mean (SD)) 111.400 (112.037)
IPH.bin % (freq) no 41.2 ( 54)
yes 58.0 ( 76)
<NA> 0.8 ( 1)
vessel_density_averaged (mean (SD)) 8.608 (6.547)
Stratified by AsymptSympt2G
Symptomatic p test Missing
n 1068
Hospital % (freq) 46.4 ( 496) 0.447 0.0
53.6 ( 572)
ORyear % (freq) 0.0 ( 0) NaN 0.0
3.9 ( 42)
9.4 ( 100)
11.5 ( 123)
11.1 ( 119)
10.2 ( 109)
10.5 ( 112)
7.4 ( 79)
8.4 ( 90)
7.6 ( 81)
9.6 ( 102)
8.3 ( 89)
2.0 ( 21)
0.1 ( 1)
0.0 ( 0)
0.0 ( 0)
0.0 ( 0)
0.0 ( 0)
0.0 ( 0)
Age (mean (SD)) 68.940 (9.115) 0.001 0.0
Gender % (freq) 31.4 ( 335) 0.088 0.0
68.6 ( 733)
TC_finalCU (mean (SD)) 183.526 (48.426) 0.174 33.5
LDL_finalCU (mean (SD)) 109.377 (41.109) 0.183 39.7
HDL_finalCU (mean (SD)) 45.809 (18.513) 0.318 36.4
TG_finalCU (mean (SD)) 145.194 (84.818) 0.209 36.1
TC_final (mean (SD)) 4.753 (1.254) 0.174 33.5
LDL_final (mean (SD)) 2.833 (1.065) 0.183 39.7
HDL_final (mean (SD)) 1.186 (0.479) 0.318 36.4
TG_final (mean (SD)) 1.641 (0.958) 0.209 36.1
hsCRP_plasma (mean (SD)) 16.551 (113.708) 0.380 38.8
systolic (mean (SD)) 155.790 (26.176) 0.397 14.0
diastoli (mean (SD)) 82.883 (13.573) 0.097 14.0
GFR_MDRD (mean (SD)) 71.866 (20.055) 0.658 3.6
BMI (mean (SD)) 26.323 (3.744) 0.386 4.2
KDOQI % (freq) 0.0 ( 0) NaN 3.7
17.2 ( 184)
53.2 ( 568)
24.3 ( 260)
1.2 ( 13)
0.4 ( 4)
3.7 ( 39)
BMI_WHO % (freq) 0.0 ( 0) NaN 4.3
0.9 ( 10)
35.6 ( 380)
46.2 ( 493)
12.7 ( 136)
4.6 ( 49)
SmokerStatus % (freq) 36.2 ( 387) 0.077 3.8
45.6 ( 487)
14.2 ( 152)
3.9 ( 42)
AlcoholUse % (freq) 33.3 ( 356) 0.347 4.1
62.4 ( 666)
4.3 ( 46)
DiabetesStatus % (freq) 77.3 ( 826) 0.882 0.0
22.7 ( 242)
Hypertension.selfreport % (freq) 0.0 ( 0) NaN 2.0
26.7 ( 285)
71.2 ( 760)
2.2 ( 23)
Hypertension.selfreportdrug % (freq) 0.0 ( 0) NaN 2.7
33.0 ( 352)
64.2 ( 686)
2.8 ( 30)
Hypertension.composite % (freq) 0.0 ( 0) NaN 0.0
14.3 ( 153)
85.7 ( 915)
Hypertension.drugs % (freq) 0.0 ( 0) NaN 0.2
23.3 ( 249)
76.5 ( 817)
0.2 ( 2)
Med.anticoagulants % (freq) 0.0 ( 0) NaN 0.2
87.9 ( 939)
11.9 ( 127)
0.2 ( 2)
Med.all.antiplatelet % (freq) 0.0 ( 0) NaN 0.4
11.0 ( 118)
88.6 ( 946)
0.4 ( 4)
Med.Statin.LLD % (freq) 0.0 ( 0) NaN 0.2
22.7 ( 242)
77.2 ( 824)
0.2 ( 2)
Stroke_Dx % (freq) 0.0 ( 0) NaN 5.3
75.2 ( 803)
19.5 ( 208)
5.3 ( 57)
sympt % (freq) 0.0 ( 0) NaN 0.0
0.0 ( 0)
46.3 ( 494)
16.7 ( 178)
12.3 ( 131)
17.2 ( 184)
2.2 ( 23)
0.2 ( 2)
1.4 ( 15)
2.7 ( 29)
0.7 ( 7)
0.3 ( 3)
0.1 ( 1)
0.0 ( 0)
0.1 ( 1)
0.0 ( 0)
0.0 ( 0)
0.0 ( 0)
Symptoms.5G % (freq) 0.0 ( 0) <0.001 0.0
17.3 ( 185)
5.6 ( 60)
1.7 ( 18)
28.9 ( 309)
46.4 ( 496)
AsymptSympt % (freq) 0.0 ( 0) <0.001 0.0
24.6 ( 263)
75.4 ( 805)
AsymptSympt2G % (freq) 0.0 ( 0) <0.001 0.0
100.0 (1068)
restenos % (freq) 0.0 ( 0) NaN 2.3
94.8 (1012)
3.2 ( 34)
0.0 ( 0)
2.1 ( 22)
stenose % (freq) 0.0 ( 0) NaN 3.2
0.6 ( 6)
6.5 ( 69)
44.5 ( 475)
42.7 ( 456)
0.9 ( 10)
0.0 ( 0)
0.5 ( 5)
1.3 ( 14)
0.0 ( 0)
3.1 ( 33)
MedHx_CVD % (freq) 36.9 ( 394) 0.719 0.0
63.1 ( 674)
CAD_history % (freq) 0.0 ( 0) NaN 0.0
69.9 ( 746)
30.1 ( 322)
PAOD % (freq) 0.0 ( 0) NaN 0.0
79.6 ( 850)
20.4 ( 218)
Peripheral.interv % (freq) 82.5 ( 881) 0.043 0.3
17.2 ( 184)
0.3 ( 3)
EP_composite % (freq) 0.0 ( 0) NaN 0.8
74.3 ( 793)
24.9 ( 266)
0.8 ( 9)
EP_composite_time (mean (SD)) 2.613 (1.094) 0.992 0.9
macmean0 (mean (SD)) 0.780 (1.229) 0.616 2.3
smcmean0 (mean (SD)) 1.904 (2.220) 0.223 2.7
Macrophages.bin % (freq) 47.5 ( 507) 0.586 1.9
50.5 ( 539)
2.1 ( 22)
SMC.bin % (freq) 32.1 ( 343) 0.088 1.8
66.0 ( 705)
1.9 ( 20)
neutrophils (mean (SD)) 172.872 (477.038) 0.876 82.0
Mast_cells_plaque (mean (SD)) 183.284 (180.156) 0.056 86.2
IPH.bin % (freq) 38.1 ( 407) 0.577 1.7
60.1 ( 642)
1.8 ( 19)
vessel_density_averaged (mean (SD)) 8.403 (6.461) 0.744 8.7
[ reached getOption("max.print") -- omitted 20 rows ]
MCP1_pg_ml_2015 and MCP1Showing the baseline table of the CEA patients in the Athero-Express Biobank with MCP1_pg_ml_2015 and MCP1.
AEDB.CEA.subset.combo <- subset(AEDB.CEA, !is.na(MCP1_pg_ml_2015) | !is.na(MCP1))
AEDB.CEA.subset.combo.tableOne = print(CreateTableOne(vars = basetable_vars,
# factorVars = basetable_bin,
strata = "AsymptSympt2G",
data = AEDB.CEA.subset.combo, includeNA = TRUE),
nonnormal = c(), missing = TRUE,
quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE,
format = "pf",
contDigits = 3)[,1:6] Stratified by AsymptSympt2G
level Asymptomatic
n 161
Hospital % (freq) St. Antonius, Nieuwegein 52.2 ( 84)
UMC Utrecht 47.8 ( 77)
ORyear % (freq) No data available/missing 0.0 ( 0)
2002 10.6 ( 17)
2003 11.8 ( 19)
2004 19.9 ( 32)
2005 13.7 ( 22)
2006 8.7 ( 14)
2007 9.3 ( 15)
2008 6.2 ( 10)
2009 6.2 ( 10)
2010 4.3 ( 7)
2011 5.0 ( 8)
2012 4.3 ( 7)
2013 0.0 ( 0)
2014 0.0 ( 0)
2015 0.0 ( 0)
2016 0.0 ( 0)
2017 0.0 ( 0)
2018 0.0 ( 0)
2019 0.0 ( 0)
Age (mean (SD)) 65.901 (9.051)
Gender % (freq) female 23.0 ( 37)
male 77.0 (124)
TC_finalCU (mean (SD)) 179.199 (45.274)
LDL_finalCU (mean (SD)) 104.132 (37.590)
HDL_finalCU (mean (SD)) 44.749 (14.890)
TG_finalCU (mean (SD)) 158.699 (87.584)
TC_final (mean (SD)) 4.641 (1.173)
LDL_final (mean (SD)) 2.697 (0.974)
HDL_final (mean (SD)) 1.159 (0.386)
TG_final (mean (SD)) 1.793 (0.990)
hsCRP_plasma (mean (SD)) 6.846 (21.838)
systolic (mean (SD)) 152.838 (24.600)
diastoli (mean (SD)) 80.824 (12.855)
GFR_MDRD (mean (SD)) 70.440 (19.793)
BMI (mean (SD)) 26.626 (3.572)
KDOQI % (freq) No data available/missing 0.0 ( 0)
Normal kidney function 14.9 ( 24)
CKD 2 (Mild) 50.9 ( 82)
CKD 3 (Moderate) 29.8 ( 48)
CKD 4 (Severe) 0.0 ( 0)
CKD 5 (Failure) 0.6 ( 1)
<NA> 3.7 ( 6)
BMI_WHO % (freq) No data available/missing 0.0 ( 0)
Underweight 1.2 ( 2)
Normal 32.3 ( 52)
Overweight 49.7 ( 80)
Obese 13.7 ( 22)
<NA> 3.1 ( 5)
SmokerStatus % (freq) Current smoker 29.2 ( 47)
Ex-smoker 56.5 ( 91)
Never smoked 11.8 ( 19)
<NA> 2.5 ( 4)
AlcoholUse % (freq) No 38.5 ( 62)
Yes 59.6 ( 96)
<NA> 1.9 ( 3)
DiabetesStatus % (freq) Control (no Diabetes Dx/Med) 78.3 (126)
Diabetes 21.7 ( 35)
Hypertension.selfreport % (freq) No data available/missing 0.0 ( 0)
no 25.5 ( 41)
yes 73.9 (119)
<NA> 0.6 ( 1)
Hypertension.selfreportdrug % (freq) No data available/missing 0.0 ( 0)
no 32.3 ( 52)
yes 66.5 (107)
<NA> 1.2 ( 2)
Hypertension.composite % (freq) No data available/missing 0.0 ( 0)
no 11.2 ( 18)
yes 88.8 (143)
Hypertension.drugs % (freq) No data available/missing 0.0 ( 0)
no 15.5 ( 25)
yes 83.9 (135)
<NA> 0.6 ( 1)
Med.anticoagulants % (freq) No data available/missing 0.0 ( 0)
no 89.4 (144)
yes 9.9 ( 16)
<NA> 0.6 ( 1)
Med.all.antiplatelet % (freq) No data available/missing 0.0 ( 0)
no 6.2 ( 10)
yes 92.5 (149)
<NA> 1.2 ( 2)
Med.Statin.LLD % (freq) No data available/missing 0.0 ( 0)
no 17.4 ( 28)
yes 82.0 (132)
<NA> 0.6 ( 1)
Stroke_Dx % (freq) Missing 0.0 ( 0)
No stroke diagnosed 80.1 (129)
Stroke diagnosed 13.7 ( 22)
<NA> 6.2 ( 10)
sympt % (freq) missing 0.0 ( 0)
Asymptomatic 100.0 (161)
TIA 0.0 ( 0)
minor stroke 0.0 ( 0)
Major stroke 0.0 ( 0)
Amaurosis fugax 0.0 ( 0)
Four vessel disease 0.0 ( 0)
Vertebrobasilary TIA 0.0 ( 0)
Retinal infarction 0.0 ( 0)
Symptomatic, but aspecific symtoms 0.0 ( 0)
Contralateral symptomatic occlusion 0.0 ( 0)
retinal infarction 0.0 ( 0)
armclaudication due to occlusion subclavian artery, CEA needed for bypass 0.0 ( 0)
retinal infarction + TIAs 0.0 ( 0)
Ocular ischemic syndrome 0.0 ( 0)
ischemisch glaucoom 0.0 ( 0)
subclavian steal syndrome 0.0 ( 0)
TGA 0.0 ( 0)
Symptoms.5G % (freq) Asymptomatic 100.0 (161)
Ocular 0.0 ( 0)
Other 0.0 ( 0)
Retinal infarction 0.0 ( 0)
Stroke 0.0 ( 0)
TIA 0.0 ( 0)
AsymptSympt % (freq) Asymptomatic 100.0 (161)
Ocular and others 0.0 ( 0)
Symptomatic 0.0 ( 0)
AsymptSympt2G % (freq) Asymptomatic 100.0 (161)
Symptomatic 0.0 ( 0)
restenos % (freq) missing 0.0 ( 0)
de novo 93.2 (150)
restenosis 3.7 ( 6)
stenose bij angioseal na PTCA 0.0 ( 0)
<NA> 3.1 ( 5)
stenose % (freq) missing 0.0 ( 0)
0-49% 0.0 ( 0)
50-70% 2.5 ( 4)
70-90% 50.9 ( 82)
90-99% 42.9 ( 69)
100% (Occlusion) 0.0 ( 0)
NA 0.0 ( 0)
50-99% 0.6 ( 1)
70-99% 0.0 ( 0)
99 0.0 ( 0)
<NA> 3.1 ( 5)
MedHx_CVD % (freq) No 37.3 ( 60)
yes 62.7 (101)
CAD_history % (freq) Missing 0.0 ( 0)
No history CAD 59.0 ( 95)
History CAD 41.0 ( 66)
PAOD % (freq) missing/no data 0.0 ( 0)
no 73.9 (119)
yes 26.1 ( 42)
Peripheral.interv % (freq) no 72.7 (117)
yes 27.3 ( 44)
<NA> 0.0 ( 0)
EP_composite % (freq) No data available. 0.0 ( 0)
No composite endpoints 68.3 (110)
Composite endpoints 31.7 ( 51)
<NA> 0.0 ( 0)
EP_composite_time (mean (SD)) 2.579 (0.961)
macmean0 (mean (SD)) 0.802 (1.072)
smcmean0 (mean (SD)) 2.445 (2.594)
Macrophages.bin % (freq) no/minor 50.3 ( 81)
moderate/heavy 49.1 ( 79)
<NA> 0.6 ( 1)
SMC.bin % (freq) no/minor 21.7 ( 35)
moderate/heavy 77.0 (124)
<NA> 1.2 ( 2)
neutrophils (mean (SD)) 133.447 (437.032)
Mast_cells_plaque (mean (SD)) 123.389 (135.924)
IPH.bin % (freq) no 39.1 ( 63)
yes 60.2 ( 97)
<NA> 0.6 ( 1)
vessel_density_averaged (mean (SD)) 8.837 (6.727)
Stratified by AsymptSympt2G
Symptomatic p test Missing
n 1168
Hospital % (freq) 46.8 ( 547) 0.235 0.0
53.2 ( 621)
ORyear % (freq) 0.0 ( 0) NaN 0.0
4.8 ( 56)
10.6 ( 124)
12.2 ( 142)
13.3 ( 155)
9.9 ( 116)
9.6 ( 112)
6.8 ( 79)
7.7 ( 90)
6.9 ( 81)
8.7 ( 102)
7.6 ( 89)
1.8 ( 21)
0.1 ( 1)
0.0 ( 0)
0.0 ( 0)
0.0 ( 0)
0.0 ( 0)
0.0 ( 0)
Age (mean (SD)) 68.788 (9.077) <0.001 0.0
Gender % (freq) 30.4 ( 355) 0.066 0.0
69.6 ( 813)
TC_finalCU (mean (SD)) 184.078 (48.333) 0.322 32.8
LDL_finalCU (mean (SD)) 109.761 (41.318) 0.206 39.9
HDL_finalCU (mean (SD)) 45.803 (18.219) 0.570 36.2
TG_finalCU (mean (SD)) 145.901 (83.176) 0.141 35.7
TC_final (mean (SD)) 4.768 (1.252) 0.322 32.8
LDL_final (mean (SD)) 2.843 (1.070) 0.206 39.9
HDL_final (mean (SD)) 1.186 (0.472) 0.570 36.2
TG_final (mean (SD)) 1.649 (0.940) 0.141 35.7
hsCRP_plasma (mean (SD)) 16.179 (110.739) 0.394 40.6
systolic (mean (SD)) 155.713 (26.406) 0.230 13.5
diastoli (mean (SD)) 82.863 (13.542) 0.097 13.5
GFR_MDRD (mean (SD)) 71.890 (20.127) 0.400 3.5
BMI (mean (SD)) 26.352 (3.765) 0.392 4.4
KDOQI % (freq) 0.0 ( 0) NaN 3.6
17.4 ( 203)
53.3 ( 623)
24.0 ( 280)
1.3 ( 15)
0.4 ( 5)
3.6 ( 42)
BMI_WHO % (freq) 0.0 ( 0) NaN 4.6
0.9 ( 11)
35.5 ( 415)
45.6 ( 533)
13.1 ( 153)
4.8 ( 56)
SmokerStatus % (freq) 36.0 ( 421) 0.070 4.0
45.6 ( 533)
14.1 ( 165)
4.2 ( 49)
AlcoholUse % (freq) 33.6 ( 393) 0.213 3.9
62.2 ( 726)
4.2 ( 49)
DiabetesStatus % (freq) 77.2 ( 902) 0.846 0.0
22.8 ( 266)
Hypertension.selfreport % (freq) 0.0 ( 0) NaN 1.9
26.6 ( 311)
71.3 ( 833)
2.1 ( 24)
Hypertension.selfreportdrug % (freq) 0.0 ( 0) NaN 2.4
33.0 ( 385)
64.5 ( 753)
2.6 ( 30)
Hypertension.composite % (freq) 0.0 ( 0) NaN 0.0
14.1 ( 165)
85.9 (1003)
Hypertension.drugs % (freq) 0.0 ( 0) NaN 0.2
22.8 ( 266)
77.1 ( 900)
0.2 ( 2)
Med.anticoagulants % (freq) 0.0 ( 0) NaN 0.2
87.9 (1027)
11.9 ( 139)
0.2 ( 2)
Med.all.antiplatelet % (freq) 0.0 ( 0) NaN 0.5
10.9 ( 127)
88.8 (1037)
0.3 ( 4)
Med.Statin.LLD % (freq) 0.0 ( 0) NaN 0.2
23.1 ( 270)
76.7 ( 896)
0.2 ( 2)
Stroke_Dx % (freq) 0.0 ( 0) NaN 5.5
75.5 ( 882)
19.1 ( 223)
5.4 ( 63)
sympt % (freq) 0.0 ( 0) NaN 0.0
0.0 ( 0)
46.5 ( 543)
17.1 ( 200)
11.6 ( 136)
17.0 ( 198)
2.1 ( 25)
0.2 ( 2)
1.4 ( 16)
3.1 ( 36)
0.6 ( 7)
0.3 ( 3)
0.1 ( 1)
0.0 ( 0)
0.1 ( 1)
0.0 ( 0)
0.0 ( 0)
0.0 ( 0)
Symptoms.5G % (freq) 0.0 ( 0) <0.001 0.0
17.0 ( 199)
5.9 ( 69)
1.6 ( 19)
28.8 ( 336)
46.7 ( 545)
AsymptSympt % (freq) 0.0 ( 0) <0.001 0.0
24.6 ( 287)
75.4 ( 881)
AsymptSympt2G % (freq) 0.0 ( 0) <0.001 0.0
100.0 (1168)
restenos % (freq) 0.0 ( 0) NaN 2.0
95.0 (1110)
3.1 ( 36)
0.0 ( 0)
1.9 ( 22)
stenose % (freq) 0.0 ( 0) NaN 2.9
0.6 ( 7)
6.2 ( 73)
44.5 ( 520)
43.2 ( 505)
0.9 ( 11)
0.0 ( 0)
0.4 ( 5)
1.2 ( 14)
0.0 ( 0)
2.8 ( 33)
MedHx_CVD % (freq) 36.7 ( 429) 0.964 0.0
63.3 ( 739)
CAD_history % (freq) 0.0 ( 0) NaN 0.0
69.1 ( 807)
30.9 ( 361)
PAOD % (freq) 0.0 ( 0) NaN 0.0
79.9 ( 933)
20.1 ( 235)
Peripheral.interv % (freq) 83.0 ( 969) 0.004 0.2
16.8 ( 196)
0.3 ( 3)
EP_composite % (freq) 0.0 ( 0) NaN 0.8
73.8 ( 862)
25.3 ( 295)
0.9 ( 11)
EP_composite_time (mean (SD)) 2.611 (1.129) 0.735 1.0
macmean0 (mean (SD)) 0.821 (1.274) 0.864 2.2
smcmean0 (mean (SD)) 1.924 (2.232) 0.007 2.5
Macrophages.bin % (freq) 45.8 ( 535) 0.314 1.8
52.2 ( 610)
2.0 ( 23)
SMC.bin % (freq) 32.4 ( 379) 0.018 1.7
65.8 ( 769)
1.7 ( 20)
neutrophils (mean (SD)) 158.140 (448.512) 0.754 81.0
Mast_cells_plaque (mean (SD)) 173.244 (168.601) 0.097 83.7
IPH.bin % (freq) 36.5 ( 426) 0.526 1.5
61.9 ( 723)
1.6 ( 19)
vessel_density_averaged (mean (SD)) 8.434 (6.386) 0.474 8.0
[ reached getOption("max.print") -- omitted 20 rows ]
Writing the baseline table to Excel format.
# Write basetable
require(openxlsx)
write.xlsx(file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.BaselineTable.wholeCEA.xlsx"),
AEDB.CEA.tableOne,
row.names = TRUE,
col.names = TRUE,
sheetName = "wholeAEDB_Baseline")
write.xlsx(file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.BaselineTable.wholeCEA.AsymptSympt.xlsx"),
AEDB.CEA.subset.AsymptSympt.tableOne,
row.names = TRUE,
col.names = TRUE,
sheetName = "wholeAEDB_Baseline_Sympt")
write.xlsx(file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.BaselineTable.subsetCEA.xlsx"),
AEDB.CEA.subset.combo.tableOne,
row.names = TRUE,
col.names = TRUE,
sheetName = "subsetAEDB_Baseline")Here we inspect the data and when necessary transform quantitative measures. We will inspect the raw, and inverse-rank normal transformation (standardise). We know that the proteins are not normally distributed and therefore we will standardise them as follows:
z = ( x - μ ) / σ
Where for each sample, x equals the value of the variable, μ (mu) equals the mean of x, and σ (sigma) equals the standard deviation of x.
We will explore the plaque levels. As noted above, we will use MCP1_pg_ml_2015, this was experiment 2 in 2015 on the LUMINEX-platform and measurements are in pg/mL.
summary(AEDB.CEA$MCP1_pg_ml_2015) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.66 101.34 298.76 600.44 770.98 10181.08 1224
do.call(rbind , by(AEDB.CEA$MCP1_pg_ml_2015, AEDB.CEA$AsymptSympt2G, summary)) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
Asymptomatic 9.36 71.1650 152.220 405.1822 537.9100 2669.59 139
Symptomatic 0.66 114.9425 314.625 624.3948 792.4225 10181.08 1085
library(patchwork)
p1 <- ggpubr::gghistogram(AEDB.CEA, "MCP1_pg_ml_2015",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
# add = "mean",
# rug = TRUE,
# add.params = list(color = "black", linetype = 2),
title = "MCP1 plaque levels",
xlab = "pg/ug",
ggtheme = theme_minimal())Using `bins = 30` by default. Pick better value with the argument `bins`.
AEDB.CEA$MCP1_pg_ml_2015_rank <- qnorm((rank(AEDB.CEA$MCP1_pg_ml_2015, na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA$MCP1_pg_ml_2015)))
p3 <- ggpubr::gghistogram(AEDB.CEA, "MCP1_pg_ml_2015_rank",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "mean",
# rug = TRUE,
# add.params = list(color = "black", linetype = 2),
title = "MCP1 plaque levels",
xlab = "inverse-normal transformation pg/mL",
ggtheme = theme_minimal())Using `bins = 30` by default. Pick better value with the argument `bins`.
p1 p3# ggpar(p1, legend = "") / ggpar(p2, legend = "") | ggpar(p3, legend = "right")
rm(p1, p3)We will explore the plaque levels. As noted above, we will use MCP1, this was experiment 1 on the LUMINEX-platform and measurements are in pg/mL.
# summary(AEDB.CEA$MCP1)
#
# do.call(rbind , by(AEDB.CEA$MCP1, AEDB.CEA$AsymptSympt2G, summary))
#
attach(AEDB.CEA)
AEDB.CEA$MCP1[MCP1 == 0] <- NA
detach(AEDB.CEA)
summary(AEDB.CEA$MCP1) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
3.865 58.057 103.811 137.960 180.297 926.273 1867
do.call(rbind , by(AEDB.CEA$MCP1, AEDB.CEA$AsymptSympt2G, summary)) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
Asymptomatic 15.578813 45.31926 77.84731 119.4878 126.1851 846.5306 184
Symptomatic 3.864774 60.54905 111.87004 141.3406 186.4375 926.2729 1683
p1 <- ggpubr::gghistogram(AEDB.CEA, "MCP1",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
# add = "mean",
# rug = TRUE,
# add.params = list(color = "black", linetype = 2),
title = "MCP1 plaque levels",
xlab = "pg/mL",
ggtheme = theme_minimal())Using `bins = 30` by default. Pick better value with the argument `bins`.
AEDB.CEA$MCP1_rank <- qnorm((rank(AEDB.CEA$MCP1, na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA$MCP1)))
p3 <- ggpubr::gghistogram(AEDB.CEA, "MCP1_rank",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "mean",
# rug = TRUE,
# add.params = list(color = "black", linetype = 2),
title = "MCP1 plaque levels",
xlab = "inverse-normal transformation pg/mL",
ggtheme = theme_minimal())Using `bins = 30` by default. Pick better value with the argument `bins`.
p1 p3# ggpar(p1, legend = "") / ggpar(p2, legend = "") | ggpar(p3, legend = "right")
rm(p1, p3)Here we compare the MCP1 plaque levels from experiment 1 with those experiment 2.
p1 <- ggpubr::ggscatter(AEDB.CEA,
x = "MCP1_rank",
y = "MCP1_pg_ml_2015_rank",
color = "#1290D9",
# fill = "Gender",
# palette = c("#1290D9", "#DB003F"),
add = "reg.line",
add.params = list(color = "black", linetype = 2),
cor.coef = TRUE, cor.method = "spearman",
xlab = "experiment 1",
ylab = "experiment 2",
title = "MCP1 plaque levels, INT, [pg/mL]",
ggtheme = theme_minimal())
p1 Based on the inverse-rank normal transformation we conclude there are no outliers and the data approximates a normal distribution. We will apply inverse-rank normal transformation on all proteins and focus the analysis on MCP1 in plaque.
The analyses are focused on three elements:
Age]Gender]Hypertension.composite]DiabetesStatus]SmokerStatus]LDL_final]Med.Statin.LLD]Med.all.antiplatelet]GFR_MDRD]BMI]MedHx_CVD] combination of [CAD_history, Stroke_history, Peripheral.interv]stenose]ORdate_year] as we discovered in Van Lammeren et al. the composition of the plaque and therefore the Athero-Express Biobank Study has changed over the years. Likely through changes in lifestyle and primary prevention regimes.We will analyze the data through four different models
In the cross-sectional analysis of plaque MCP1 levels we will focus on the following plaque vulnerability phenotypes:
We inspect the plaque characteristics, and inverse-rank normal transformation continuous phenotypes.
# macrophages
cat("Summary of data.\n")Summary of data.
summary(AEDB.CEA$macmean0) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.0733 0.3133 0.7671 0.9967 15.1000 720
min_macmean <- min(AEDB.CEA$macmean0, na.rm = TRUE)
cat(paste0("\nMinimum value % macrophages: ",min_macmean,".\n"))
Minimum value % macrophages: 0.
AEDB.CEA$Macrophages_LN <- log(AEDB.CEA$macmean0 + min_macmean)
ggpubr::gghistogram(AEDB.CEA, "Macrophages_LN",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "% macrophages",
xlab = "natural log-transformed %",
ggtheme = theme_minimal())Using `bins = 30` by default. Pick better value with the argument `bins`.
AEDB.CEA$Macrophages_rank <- qnorm((rank(AEDB.CEA$macmean0, na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA$macmean0)))
ggpubr::gghistogram(AEDB.CEA, "Macrophages_rank",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "% macrophages",
xlab = "inverse-rank normalized %",
ggtheme = theme_minimal())Using `bins = 30` by default. Pick better value with the argument `bins`.
# smooth muscle cells
cat("Summary of data.\n")Summary of data.
summary(AEDB.CEA$macmean0) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.0733 0.3133 0.7671 0.9967 15.1000 720
min_smcmean <- min(AEDB.CEA$smcmean0, na.rm = TRUE)
cat(paste0("\nMinimum value % smooth muscle cells: ",min_smcmean,".\n"))
Minimum value % smooth muscle cells: 0.
AEDB.CEA$SMC_LN <- log(AEDB.CEA$smcmean0 + min_smcmean)
ggpubr::gghistogram(AEDB.CEA, "SMC_LN",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "% smooth muscle cells",
xlab = "natural log-transformed %",
ggtheme = theme_minimal())Using `bins = 30` by default. Pick better value with the argument `bins`.
AEDB.CEA$SMC_rank <- qnorm((rank(AEDB.CEA$smcmean0, na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA$smcmean0)))
ggpubr::gghistogram(AEDB.CEA, "SMC_rank",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "% smooth muscle cells",
xlab = "inverse-rank normalized %",
ggtheme = theme_minimal())Using `bins = 30` by default. Pick better value with the argument `bins`.
# vessel density
cat("Summary of data.\n")Summary of data.
summary(AEDB.CEA$vessel_density_averaged) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.000 4.000 7.000 8.318 11.300 48.000 850
min_vesseldensity <- min(AEDB.CEA$vessel_density_averaged, na.rm = TRUE)
min_vesseldensity[1] 0
cat(paste0("\nMinimum value number of intraplaque neovessels per 3-4 hotspots: ",min_vesseldensity,".\n"))
Minimum value number of intraplaque neovessels per 3-4 hotspots: 0.
AEDB.CEA$VesselDensity_LN <- log(AEDB.CEA$vessel_density_averaged + min_vesseldensity)
ggpubr::gghistogram(AEDB.CEA, "VesselDensity_LN",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "number of intraplaque neovessels per 3-4 hotspots",
xlab = "natural log-transformed number",
ggtheme = theme_minimal())Using `bins = 30` by default. Pick better value with the argument `bins`.
AEDB.CEA$VesselDensity_rank <- qnorm((rank(AEDB.CEA$vessel_density_averaged, na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA$vessel_density_averaged)))
ggpubr::gghistogram(AEDB.CEA, "VesselDensity_rank",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "number of intraplaque neovessels per 3-4 hotspots",
xlab = "inverse-rank normalized number",
ggtheme = theme_minimal())Using `bins = 30` by default. Pick better value with the argument `bins`.
Given their strong correlation, we also introduce a macrophages/smooth muscle cell ratio. This is a proxy of the extend to which a plaque is inflammed (‘unstable’) as compared to ‘stable’.
AEDB.CEA$MAC_SMC_ratio <- AEDB.CEA$macmean0 / AEDB.CEA$smcmean0
AEDB.CEA$MAC_SMC_ratio_rank <- qnorm((rank(AEDB.CEA$MAC_SMC_ratio, na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA$MAC_SMC_ratio)))
cat("Summary of data.\n")Summary of data.
summary(AEDB.CEA$Macrophages_rank) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-2.3161 -0.6703 0.0000 0.0020 0.6745 3.4375 720
summary(AEDB.CEA$SMC_rank) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-2.6939 -0.6736 0.0015 0.0006 0.6740 3.4368 724
summary(AEDB.CEA$MAC_SMC_ratio_rank) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-2.3364 -0.6740 0.0000 0.0013 0.6740 2.7533 728
ggpubr::gghistogram(AEDB.CEA, "MAC_SMC_ratio_rank",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "macrophages/smooth muscle cells ratio",
xlab = "inverse-rank normalized",
ggtheme = theme_minimal())Using `bins = 30` by default. Pick better value with the argument `bins`.
# calcification
cat("Summary of data.\n")Summary of data.
summary(AEDB.CEA$Calc.bin) no/minor moderate/heavy NA's
1007 850 566
contrasts(AEDB.CEA$Calc.bin) moderate/heavy
no/minor 0
moderate/heavy 1
AEDB.CEA$CalcificationPlaque <- as.factor(AEDB.CEA$Calc.bin)
df <- AEDB.CEA %>%
filter(!is.na(CalcificationPlaque)) %>%
group_by(Gender, CalcificationPlaque) %>%
summarise(counts = n()) `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
ggpubr::ggbarplot(df, x = "CalcificationPlaque", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "Calcification",
xlab = "calcification",
ggtheme = theme_minimal())rm(df)
# collagen
cat("Summary of data.\n")Summary of data.
summary(AEDB.CEA$Collagen.bin) no/minor moderate/heavy NA's
382 1469 572
contrasts(AEDB.CEA$Collagen.bin) moderate/heavy
no/minor 0
moderate/heavy 1
AEDB.CEA$CollagenPlaque <- as.factor(AEDB.CEA$Collagen.bin)
df <- AEDB.CEA %>%
filter(!is.na(CollagenPlaque)) %>%
group_by(Gender, CollagenPlaque) %>%
summarise(counts = n()) `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
ggpubr::ggbarplot(df, x = "CollagenPlaque", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "Collagen",
xlab = "collagen",
ggtheme = theme_minimal())rm(df)
# fat 10%
cat("Summary of data.\n")Summary of data.
summary(AEDB.CEA$Fat.bin_10) <10% >10% NA's
542 1316 565
contrasts(AEDB.CEA$Fat.bin_10) >10%
<10% 0
>10% 1
AEDB.CEA$Fat10Perc <- as.factor(AEDB.CEA$Fat.bin_10)
df <- AEDB.CEA %>%
filter(!is.na(Fat10Perc)) %>%
group_by(Gender, Fat10Perc) %>%
summarise(counts = n()) `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
ggpubr::ggbarplot(df, x = "Fat10Perc", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "Intraplaque fat",
xlab = "intraplaque fat",
ggtheme = theme_minimal())rm(df)
# macrophages binned
cat("Summary of data.\n")Summary of data.
summary(AEDB.CEA$Macrophages.bin) no/minor moderate/heavy NA's
847 992 584
contrasts(AEDB.CEA$Macrophages.bin) moderate/heavy
no/minor 0
moderate/heavy 1
AEDB.CEA$MAC_binned <- as.factor(AEDB.CEA$Macrophages.bin)
df <- AEDB.CEA %>%
filter(!is.na(MAC_binned)) %>%
group_by(Gender, MAC_binned) %>%
summarise(counts = n()) `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
ggpubr::ggbarplot(df, x = "MAC_binned", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "Macrophages (binned)",
xlab = "Macrophages",
ggtheme = theme_minimal())rm(df)
# macrophages grouped
cat("Summary of data.\n")Summary of data.
AEDB.CEA$macrophages <- as.factor(AEDB.CEA$macrophages)
summary(AEDB.CEA$macrophages)-888 0 1 2 3 NA's
6 173 674 786 206 578
contrasts(AEDB.CEA$macrophages) 0 1 2 3
-888 0 0 0 0
0 1 0 0 0
1 0 1 0 0
2 0 0 1 0
3 0 0 0 1
AEDB.CEA$MAC_grouped <- as.factor(AEDB.CEA$macrophages)
df <- AEDB.CEA %>%
filter(!is.na(MAC_grouped)) %>%
group_by(Gender, MAC_grouped) %>%
summarise(counts = n()) `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
ggpubr::ggbarplot(df, x = "MAC_grouped", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "Macrophages (grouped)",
xlab = "Macrophages",
ggtheme = theme_minimal())rm(df)
# SMC binned
cat("Summary of data.\n")Summary of data.
summary(AEDB.CEA$SMC.bin) no/minor moderate/heavy NA's
602 1244 577
contrasts(AEDB.CEA$SMC.bin) moderate/heavy
no/minor 0
moderate/heavy 1
AEDB.CEA$SMC_binned <- as.factor(AEDB.CEA$SMC.bin)
df <- AEDB.CEA %>%
filter(!is.na(SMC_binned)) %>%
group_by(Gender, SMC_binned) %>%
summarise(counts = n()) `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
ggpubr::ggbarplot(df, x = "SMC_binned", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "SMC (binned)",
xlab = "SMC",
ggtheme = theme_minimal())rm(df)
# SMC grouped
cat("Summary of data.\n")Summary of data.
AEDB.CEA$smc <- as.factor(AEDB.CEA$smc)
summary(AEDB.CEA$smc)-888 0 1 2 3 NA's
4 44 558 908 336 573
contrasts(AEDB.CEA$smc) 0 1 2 3
-888 0 0 0 0
0 1 0 0 0
1 0 1 0 0
2 0 0 1 0
3 0 0 0 1
AEDB.CEA$SMC_grouped <- as.factor(AEDB.CEA$smc)
df <- AEDB.CEA %>%
filter(!is.na(SMC_grouped)) %>%
group_by(Gender, SMC_grouped) %>%
summarise(counts = n()) `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
ggpubr::ggbarplot(df, x = "SMC_grouped", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "SMC (grouped)",
xlab = "SMC",
ggtheme = theme_minimal())rm(df)
# IPH
cat("Summary of data.\n")Summary of data.
summary(AEDB.CEA$IPH.bin) no yes NA's
746 1108 569
contrasts(AEDB.CEA$IPH.bin) yes
no 0
yes 1
AEDB.CEA$IPH <- as.factor(AEDB.CEA$IPH.bin)
df <- AEDB.CEA %>%
filter(!is.na(IPH)) %>%
group_by(Gender, IPH) %>%
summarise(counts = n()) `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
ggpubr::ggbarplot(df, x = "IPH", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "Intraplaque hemorrhage",
xlab = "intraplaque hemorrhage",
ggtheme = theme_minimal())rm(df)
# Symptoms
cat("Summary of data.\n")Summary of data.
summary(AEDB.CEA$AsymptSympt) Asymptomatic Ocular and others Symptomatic
270 541 1612
contrasts(AEDB.CEA$AsymptSympt) Ocular and others Symptomatic
Asymptomatic 0 0
Ocular and others 1 0
Symptomatic 0 1
AEDB.CEA$AsymptSympt <- as.factor(AEDB.CEA$AsymptSympt)
df <- AEDB.CEA %>%
filter(!is.na(AsymptSympt)) %>%
group_by(Gender, AsymptSympt) %>%
summarise(counts = n()) `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
ggpubr::ggbarplot(df, x = "AsymptSympt", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "Symptoms",
xlab = "symptoms",
ggtheme = theme_minimal())rm(df)Here we compare the MCP1 plaque levels from experiment 1 with those experiment 2. The latter we measured in pg/mL and also corrected for the total protein content (pg/ug).
p1 <- ggpubr::ggscatter(AEDB.CEA,
x = "ORyear",
y = "MCP1_rank",
color = "#1290D9",
# fill = "Gender",
# palette = c("#1290D9", "#DB003F"),
add = "reg.line",
add.params = list(color = "black", linetype = 2),
cor.coef = TRUE, cor.method = "spearman",
xlab = "year of surgery",
ylab = "experiment 1",
title = "MCP1 plaque levels, INT, [pg/mL]",
ggtheme = theme_minimal())
p1
p2 <- ggpubr::ggscatter(AEDB.CEA,
x = "ORyear",
y = "MCP1_pg_ml_2015_rank",
color = "#1290D9",
# fill = "Gender",
# palette = c("#1290D9", "#DB003F"),
add = "reg.line",
add.params = list(color = "black", linetype = 2),
cor.coef = TRUE, cor.method = "spearman",
xlab = "year of surgery",
ylab = "experiment 2, [pg/mL]",
title = "MCP1 plaque levels, INT, [pg/mL]",
ggtheme = theme_minimal())
p2
rm(p1, p2)In this section we make some variables to assist with analysis.
AEDB.CEA.samplesize = nrow(AEDB.CEA)
TRAITS.PROTEIN.RANK = c("MCP1_pg_ml_2015_rank", "MCP1_rank")
TRAITS.CON.RANK = c("Macrophages_rank", "SMC_rank", "MAC_SMC_ratio_rank", "VesselDensity_rank")
TRAITS.BIN = c("CalcificationPlaque", "CollagenPlaque", "Fat10Perc", "IPH",
"MAC_binned", "SMC_binned")
# "Hospital",
# "Age", "Gender",
# "TC_final", "LDL_final", "HDL_final", "TG_final",
# "systolic", "diastoli", "GFR_MDRD", "BMI",
# "KDOQI", "BMI_WHO",
# "SmokerCurrent", "eCigarettes", "ePackYearsSmoking",
# "DiabetesStatus", "Hypertension.composite",
# "Hypertension.drugs", "Med.anticoagulants", "Med.all.antiplatelet", "Med.Statin.LLD",
# "Stroke_Dx", "sympt", "Symptoms.5G", "restenos",
# "EP_composite", "EP_composite_time",
# "macmean0", "smcmean0", "Macrophages.bin", "SMC.bin",
# "neutrophils", "Mast_cells_plaque",
# "IPH.bin", "vessel_density_averaged",
# "Calc.bin", "Collagen.bin",
# "Fat.bin_10", "Fat.bin_40", "OverallPlaquePhenotype",
# "IL6_pg_ug_2015", "MCP1_pg_ug_2015",
# "QC2018_FILTER", "CHIP", "SAMPLE_TYPE",
# "CAD_history", "Stroke_history", "Peripheral.interv",
# "stenose"
# 1. Age (continuous in 1-year increment). [Age]
# 2. Sex (male vs. female). [Gender]
# 3. Presence of hypertension at baseline (defined either as history of hypertension, SBP ≥140 mm Hg, DBP ≥90 mm Hg, or prescription of antihypertensive medications). [Hypertension.composite]
# 4. Presence of diabetes mellitus at baseline (defined either as a history of diabetes, administration of glucose lowering medication, HbA1c ≥6.5%, fasting glucose ≥126 mg/dl, .or random glucose levels ≥200 mg/dl). [DiabetesStatus]
# 5. Smoking (current, ex-, never). [SmokerCurrent]
# 6. LDL-C levels (continuous). [LDL_final]
# 7. Use of lipid-lowering drugs. [Med.Statin.LLD]
# 8. Use of antiplatelet drugs. [Med.all.antiplatelet]
# 9. eGFR (continuous). [GFR_MDRD]
# 10. BMI (continuous). [BMI]
# 11. History of cardiovascular disease (stroke, coronary artery disease, peripheral artery disease). [MedHx_CVD] combinatino of: [CAD_history, Stroke_history, Peripheral.interv]
# 12. Level of stenosis (50-70% vs. 70-99%). [stenose]
# Models
# Model 1: adjusted for age and sex
# Model 2: adjusted for age, sex, hypertension, diabetes, smoking, LDL-C levels, lipid-lowering drugs, antiplatelet drugs, eGFR, BMI, history of CVD, level of stenosis,
AEDB.CEA$ORdate_epoch <- as.numeric(AEDB.CEA$dateok)
AEDB.CEA$ORdate_year <- as.numeric(year(AEDB.CEA$dateok))
cat("Summary of 'year of surgery' in 'epoch' (); coded as `numeric()`\n")Summary of 'year of surgery' in 'epoch' (); coded as `numeric()`
summary(AEDB.CEA$ORdate_epoch) Min. 1st Qu. Median Mean 3rd Qu. Max.
11770 13132 14518 14567 15860 18250
cat("\nSummary of 'year of surgery' in 'years' (); coded as `factor()`\n")
Summary of 'year of surgery' in 'years' (); coded as `factor()`
table(AEDB.CEA$ORdate_year)
2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
81 157 190 185 183 152 138 182 159 164 176 149 163 76 85 65 66 52
COVARIATES_M1 = c("Age", "Gender", "ORdate_year")
# COVARIATES_M1 = c("Age", "Gender", "ORdate_epoch")
COVARIATES_M2 = c(COVARIATES_M1,
"Hypertension.composite", "DiabetesStatus",
"SmokerStatus",
# "SmokerCurrent",
"Med.Statin.LLD", "Med.all.antiplatelet",
"GFR_MDRD", "BMI",
# "CAD_history", "Stroke_history", "Peripheral.interv",
"MedHx_CVD",
"stenose")
# COVARIATES_M3 = c(COVARIATES_M2, "LDL_final")
# COVARIATES_M4 = c(COVARIATES_M2, "hsCRP_plasma")In this model we correct for Age, Gender, and year of surgery.
Here we use the inverse-rank normalized data - visually this is more normally distributed.
Analysis of continuous/quantitative plaque traits as a function of plaque MCP1 levels.
GLM.results <- data.frame(matrix(NA, ncol = 15, nrow = 0))
cat("Running linear regression...\n")Running linear regression...
for (protein in 1:length(TRAITS.PROTEIN.RANK)) {
PROTEIN = TRAITS.PROTEIN.RANK[protein]
cat(paste0("\nAnalysis of ",PROTEIN,".\n"))
for (trait in 1:length(TRAITS.CON.RANK)) {
TRAIT = TRAITS.CON.RANK[trait]
cat(paste0("\n- processing ",TRAIT,"\n\n"))
currentDF <- as.data.frame(AEDB.CEA %>%
dplyr::select(., PROTEIN, TRAIT, COVARIATES_M1) %>%
filter(complete.cases(.))) %>%
filter_if(~is.numeric(.), all_vars(!is.infinite(.)))
# for debug
# print(DT::datatable(currentDF))
# print(nrow(currentDF))
# print(str(currentDF))
### univariate
fit <- lm(currentDF[,PROTEIN] ~ currentDF[,TRAIT] + Age + Gender + ORdate_year, data = currentDF)
model_step <- stepAIC(fit, direction = "both", trace = FALSE)
print(model_step)
print(summary(fit))
GLM.results.TEMP <- data.frame(matrix(NA, ncol = 15, nrow = 0))
GLM.results.TEMP[1,] = GLM.CON(fit, "AEDB.CEA", PROTEIN, TRAIT, verbose = TRUE)
GLM.results = rbind(GLM.results, GLM.results.TEMP)
}
}
Analysis of MCP1_pg_ml_2015_rank.
- processing Macrophages_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Gender +
ORdate_year, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Gendermale ORdate_year
-254.43741 0.06422 0.32420 0.12666
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-2.98066 -0.58281 -0.01477 0.58485 3.01698
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.535e+02 1.846e+01 -13.732 < 2e-16 ***
currentDF[, TRAIT] 6.552e-02 2.761e-02 2.373 0.0178 *
Age 1.944e-03 2.918e-03 0.666 0.5055
Gendermale 3.236e-01 5.775e-02 5.603 2.63e-08 ***
ORdate_year 1.261e-01 9.207e-03 13.698 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9087 on 1166 degrees of freedom
Multiple R-squared: 0.1648, Adjusted R-squared: 0.162
F-statistic: 57.54 on 4 and 1166 DF, p-value: < 2.2e-16
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' Macrophages_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: Macrophages_rank
Effect size...............: 0.065515
Standard error............: 0.027613
Odds ratio (effect size)..: 1.068
Lower 95% CI..............: 1.011
Upper 95% CI..............: 1.127
T-value...................: 2.372657
P-value...................: 0.01782216
R^2.......................: 0.164841
Adjusted r^2..............: 0.161976
Sample size of AE DB......: 2423
Sample size of model......: 1171
Missing data %............: 51.67148
- processing SMC_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Gender +
ORdate_year, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Gendermale ORdate_year
-232.0776 -0.0943 0.3013 0.1155
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.15076 -0.58587 -0.02393 0.55488 3.09880
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.322e+02 1.827e+01 -12.713 < 2e-16 ***
currentDF[, TRAIT] -9.502e-02 2.851e-02 -3.333 0.000887 ***
Age -4.616e-04 2.944e-03 -0.157 0.875435
Gendermale 3.012e-01 5.795e-02 5.198 2.38e-07 ***
ORdate_year 1.156e-01 9.109e-03 12.694 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.904 on 1162 degrees of freedom
Multiple R-squared: 0.1708, Adjusted R-squared: 0.168
F-statistic: 59.84 on 4 and 1162 DF, p-value: < 2.2e-16
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' SMC_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: SMC_rank
Effect size...............: -0.095019
Standard error............: 0.02851
Odds ratio (effect size)..: 0.909
Lower 95% CI..............: 0.86
Upper 95% CI..............: 0.962
T-value...................: -3.332864
P-value...................: 0.0008866462
R^2.......................: 0.170817
Adjusted r^2..............: 0.167963
Sample size of AE DB......: 2423
Sample size of model......: 1167
Missing data %............: 51.83657
- processing MAC_SMC_ratio_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Gender +
ORdate_year, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Gendermale ORdate_year
-252.3972 0.1248 0.2862 0.1257
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-2.95999 -0.57823 -0.00289 0.55678 3.03063
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.520e+02 1.785e+01 -14.117 < 2e-16 ***
currentDF[, TRAIT] 1.246e-01 2.747e-02 4.537 6.31e-06 ***
Age 7.136e-04 2.899e-03 0.246 0.806
Gendermale 2.861e-01 5.803e-02 4.931 9.38e-07 ***
ORdate_year 1.254e-01 8.904e-03 14.085 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9011 on 1160 degrees of freedom
Multiple R-squared: 0.1775, Adjusted R-squared: 0.1747
F-statistic: 62.59 on 4 and 1160 DF, p-value: < 2.2e-16
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' MAC_SMC_ratio_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: MAC_SMC_ratio_rank
Effect size...............: 0.124615
Standard error............: 0.027468
Odds ratio (effect size)..: 1.133
Lower 95% CI..............: 1.073
Upper 95% CI..............: 1.195
T-value...................: 4.536768
P-value...................: 6.305959e-06
R^2.......................: 0.17752
Adjusted r^2..............: 0.174684
Sample size of AE DB......: 2423
Sample size of model......: 1165
Missing data %............: 51.91911
- processing VesselDensity_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Gender +
ORdate_year, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Gendermale ORdate_year
-228.08372 -0.06221 0.33587 0.11352
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.04767 -0.60662 0.00131 0.57990 3.05690
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.268e+02 1.916e+01 -11.838 < 2e-16 ***
currentDF[, TRAIT] -6.247e-02 2.862e-02 -2.183 0.0293 *
Age 1.921e-03 3.059e-03 0.628 0.5302
Gendermale 3.356e-01 6.036e-02 5.561 3.38e-08 ***
ORdate_year 1.128e-01 9.561e-03 11.801 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9201 on 1090 degrees of freedom
Multiple R-squared: 0.1558, Adjusted R-squared: 0.1527
F-statistic: 50.28 on 4 and 1090 DF, p-value: < 2.2e-16
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' VesselDensity_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: VesselDensity_rank
Effect size...............: -0.062472
Standard error............: 0.02862
Odds ratio (effect size)..: 0.939
Lower 95% CI..............: 0.888
Upper 95% CI..............: 0.994
T-value...................: -2.182858
P-value...................: 0.02925907
R^2.......................: 0.155768
Adjusted r^2..............: 0.15267
Sample size of AE DB......: 2423
Sample size of model......: 1095
Missing data %............: 54.80809
Analysis of MCP1_rank.
- processing Macrophages_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Gender +
ORdate_year, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Gendermale ORdate_year
422.8331 0.1222 0.2600 -0.2111
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.4317 -0.6291 -0.0261 0.6543 2.8355
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 414.987584 75.061156 5.529 4.99e-08 ***
currentDF[, TRAIT] 0.121339 0.038035 3.190 0.0015 **
Age -0.006268 0.004724 -1.327 0.1851
Gendermale 0.263235 0.090556 2.907 0.0038 **
ORdate_year -0.206979 0.037471 -5.524 5.13e-08 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9598 on 550 degrees of freedom
Multiple R-squared: 0.0847, Adjusted R-squared: 0.07804
F-statistic: 12.72 on 4 and 550 DF, p-value: 6.54e-10
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' Macrophages_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: Macrophages_rank
Effect size...............: 0.121339
Standard error............: 0.038035
Odds ratio (effect size)..: 1.129
Lower 95% CI..............: 1.048
Upper 95% CI..............: 1.216
T-value...................: 3.19016
P-value...................: 0.001502979
R^2.......................: 0.084699
Adjusted r^2..............: 0.078042
Sample size of AE DB......: 2423
Sample size of model......: 555
Missing data %............: 77.09451
- processing SMC_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Age Gendermale ORdate_year
485.13156 -0.22645 -0.01251 0.22171 -0.24174
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.2040 -0.6017 -0.0439 0.6538 2.7241
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 485.131555 74.828140 6.483 2.01e-10 ***
currentDF[, TRAIT] -0.226449 0.039572 -5.722 1.73e-08 ***
Age -0.012514 0.004716 -2.654 0.0082 **
Gendermale 0.221712 0.089045 2.490 0.0131 *
ORdate_year -0.241741 0.037348 -6.473 2.14e-10 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9371 on 547 degrees of freedom
Multiple R-squared: 0.1219, Adjusted R-squared: 0.1155
F-statistic: 18.98 on 4 and 547 DF, p-value: 1.253e-14
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' SMC_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: SMC_rank
Effect size...............: -0.226449
Standard error............: 0.039572
Odds ratio (effect size)..: 0.797
Lower 95% CI..............: 0.738
Upper 95% CI..............: 0.862
T-value...................: -5.722399
P-value...................: 1.731767e-08
R^2.......................: 0.121879
Adjusted r^2..............: 0.115458
Sample size of AE DB......: 2423
Sample size of model......: 552
Missing data %............: 77.21832
- processing MAC_SMC_ratio_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Age Gendermale ORdate_year
469.90060 0.22103 -0.01003 0.21847 -0.23424
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.3533 -0.5974 -0.0586 0.6573 2.9781
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 469.90060 73.95935 6.353 4.44e-10 ***
currentDF[, TRAIT] 0.22102 0.03612 6.119 1.80e-09 ***
Age -0.01003 0.00464 -2.162 0.0311 *
Gendermale 0.21847 0.08873 2.462 0.0141 *
ORdate_year -0.23424 0.03692 -6.345 4.68e-10 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.933 on 546 degrees of freedom
Multiple R-squared: 0.1294, Adjusted R-squared: 0.123
F-statistic: 20.29 on 4 and 546 DF, p-value: 1.346e-15
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' MAC_SMC_ratio_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: MAC_SMC_ratio_rank
Effect size...............: 0.221025
Standard error............: 0.036122
Odds ratio (effect size)..: 1.247
Lower 95% CI..............: 1.162
Upper 95% CI..............: 1.339
T-value...................: 6.118872
P-value...................: 1.799303e-09
R^2.......................: 0.129413
Adjusted r^2..............: 0.123035
Sample size of AE DB......: 2423
Sample size of model......: 551
Missing data %............: 77.2596
- processing VesselDensity_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ Gender + ORdate_year, data = currentDF)
Coefficients:
(Intercept) Gendermale ORdate_year
427.9795 0.2941 -0.2137
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.4045 -0.5978 -0.0351 0.6466 2.6590
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 407.491047 77.437980 5.262 2.06e-07 ***
currentDF[, TRAIT] -0.055611 0.050753 -1.096 0.27369
Age -0.006762 0.004796 -1.410 0.15917
Gendermale 0.296448 0.092030 3.221 0.00135 **
ORdate_year -0.203215 0.038660 -5.257 2.12e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9662 on 539 degrees of freedom
Multiple R-squared: 0.07477, Adjusted R-squared: 0.0679
F-statistic: 10.89 on 4 and 539 DF, p-value: 1.697e-08
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' VesselDensity_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: VesselDensity_rank
Effect size...............: -0.055611
Standard error............: 0.050753
Odds ratio (effect size)..: 0.946
Lower 95% CI..............: 0.856
Upper 95% CI..............: 1.045
T-value...................: -1.09571
P-value...................: 0.2736949
R^2.......................: 0.07477
Adjusted r^2..............: 0.067904
Sample size of AE DB......: 2423
Sample size of model......: 544
Missing data %............: 77.54849
cat("Edit the column names...\n")Edit the column names...
colnames(GLM.results) = c("Dataset", "Predictor", "Trait",
"Beta", "s.e.m.",
"OR", "low95CI", "up95CI",
"T-value", "P-value", "r^2", "r^2_adj", "AE_N", "Model_N", "Perc_Miss")
cat("Correct the variable types...\n")Correct the variable types...
GLM.results$Beta <- as.numeric(GLM.results$Beta)
GLM.results$s.e.m. <- as.numeric(GLM.results$s.e.m.)
GLM.results$OR <- as.numeric(GLM.results$OR)
GLM.results$low95CI <- as.numeric(GLM.results$low95CI)
GLM.results$up95CI <- as.numeric(GLM.results$up95CI)
GLM.results$`T-value` <- as.numeric(GLM.results$`T-value`)
GLM.results$`P-value` <- as.numeric(GLM.results$`P-value`)
GLM.results$`r^2` <- as.numeric(GLM.results$`r^2`)
GLM.results$`r^2_adj` <- as.numeric(GLM.results$`r^2_adj`)
GLM.results$`AE_N` <- as.numeric(GLM.results$`AE_N`)
GLM.results$`Model_N` <- as.numeric(GLM.results$`Model_N`)
GLM.results$`Perc_Miss` <- as.numeric(GLM.results$`Perc_Miss`)
# Save the data
cat("Writing results to Excel-file...\n")Writing results to Excel-file...
### Univariate
library(openxlsx)
write.xlsx(GLM.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Con.Uni.Protein.PlaquePhenotypes.RANK.MODEL1.xlsx"),
row.names = FALSE, col.names = TRUE, sheetName = "Con.Uni.PlaquePheno")
# Removing intermediates
cat("Removing intermediate files...\n")Removing intermediate files...
rm(TRAIT, trait, currentDF, GLM.results, GLM.results.TEMP, fit, model_step)Analysis of binary plaque traits as a function of plaque MCP1 levels.
GLM.results <- data.frame(matrix(NA, ncol = 16, nrow = 0))
for (protein in 1:length(TRAITS.PROTEIN.RANK)) {
PROTEIN = TRAITS.PROTEIN.RANK[protein]
cat(paste0("\nAnalysis of ",PROTEIN,".\n"))
for (trait in 1:length(TRAITS.BIN)) {
TRAIT = TRAITS.BIN[trait]
cat(paste0("\n- processing ",TRAIT,"\n\n"))
currentDF <- as.data.frame(AEDB.CEA %>%
dplyr::select(., PROTEIN, TRAIT, COVARIATES_M1) %>%
filter(complete.cases(.))) %>%
filter_if(~is.numeric(.), all_vars(!is.infinite(.)))
# for debug
# print(DT::datatable(currentDF))
# print(nrow(currentDF))
# print(str(currentDF))
# print(class(currentDF[,TRAIT]))
### univariate
fit <- glm(as.factor(currentDF[,TRAIT]) ~ currentDF[,PROTEIN] + Age + Gender + ORdate_year,
data = currentDF, family = binomial(link = "logit"))
model_step <- stepAIC(fit, direction = "both", trace = FALSE)
print(model_step)
print(summary(fit))
GLM.results.TEMP <- data.frame(matrix(NA, ncol = 16, nrow = 0))
GLM.results.TEMP[1,] = GLM.BIN(fit, "AEDB.CEA", PROTEIN, TRAIT, verbose = TRUE)
GLM.results = rbind(GLM.results, GLM.results.TEMP)
}
}
Analysis of MCP1_pg_ml_2015_rank.
- processing CalcificationPlaque
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + ORdate_year, family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age ORdate_year
310.77553 -0.34903 0.02309 -0.15567
Degrees of Freedom: 1180 Total (i.e. Null); 1177 Residual
Null Deviance: 1637
Residual Deviance: 1513 AIC: 1521
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.8216 -1.0490 -0.6315 1.0837 2.0978
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 312.506897 44.478670 7.026 2.13e-12 ***
currentDF[, PROTEIN] -0.340451 0.068997 -4.934 8.04e-07 ***
Age 0.023128 0.006798 3.402 0.000669 ***
Gendermale -0.109930 0.134724 -0.816 0.414521
ORdate_year -0.156493 0.022190 -7.052 1.76e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1636.6 on 1180 degrees of freedom
Residual deviance: 1512.0 on 1176 degrees of freedom
AIC: 1522
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' CalcificationPlaque ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: CalcificationPlaque
Effect size...............: -0.340451
Standard error............: 0.068997
Odds ratio (effect size)..: 0.711
Lower 95% CI..............: 0.621
Upper 95% CI..............: 0.814
Z-value...................: -4.934311
P-value...................: 8.04341e-07
Hosmer and Lemeshow r^2...: 0.07616
Cox and Snell r^2.........: 0.100162
Nagelkerke's pseudo r^2...: 0.133573
Sample size of AE DB......: 2423
Sample size of model......: 1181
Missing data %............: 51.25877
- processing CollagenPlaque
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN],
family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN]
1.3403 -0.2873
Degrees of Freedom: 1181 Total (i.e. Null); 1180 Residual
Null Deviance: 1217
Residual Deviance: 1202 AIC: 1206
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.2054 0.5389 0.6456 0.7150 1.0194
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -17.700869 50.816422 -0.348 0.727593
currentDF[, PROTEIN] -0.304533 0.079925 -3.810 0.000139 ***
Age 0.004359 0.007869 0.554 0.579607
Gendermale 0.066303 0.158551 0.418 0.675814
ORdate_year 0.009316 0.025343 0.368 0.713171
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1217.1 on 1181 degrees of freedom
Residual deviance: 1200.8 on 1177 degrees of freedom
AIC: 1210.8
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' CollagenPlaque ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: CollagenPlaque
Effect size...............: -0.304533
Standard error............: 0.079925
Odds ratio (effect size)..: 0.737
Lower 95% CI..............: 0.631
Upper 95% CI..............: 0.863
Z-value...................: -3.810259
P-value...................: 0.0001388211
Hosmer and Lemeshow r^2...: 0.013321
Cox and Snell r^2.........: 0.013622
Nagelkerke's pseudo r^2...: 0.021189
Sample size of AE DB......: 2423
Sample size of model......: 1182
Missing data %............: 51.2175
- processing Fat10Perc
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age Gendermale ORdate_year
467.88804 0.44852 0.01646 0.80595 -0.23342
Degrees of Freedom: 1181 Total (i.e. Null); 1177 Residual
Null Deviance: 1390
Residual Deviance: 1258 AIC: 1268
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.6456 -0.9830 0.5993 0.7919 1.6326
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 467.888045 53.325556 8.774 < 2e-16 ***
currentDF[, PROTEIN] 0.448516 0.079848 5.617 1.94e-08 ***
Age 0.016457 0.007424 2.217 0.0266 *
Gendermale 0.805951 0.144389 5.582 2.38e-08 ***
ORdate_year -0.233421 0.026589 -8.779 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1390.3 on 1181 degrees of freedom
Residual deviance: 1258.3 on 1177 degrees of freedom
AIC: 1268.3
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' Fat10Perc ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: Fat10Perc
Effect size...............: 0.448516
Standard error............: 0.079848
Odds ratio (effect size)..: 1.566
Lower 95% CI..............: 1.339
Upper 95% CI..............: 1.831
Z-value...................: 5.617133
P-value...................: 1.941519e-08
Hosmer and Lemeshow r^2...: 0.094988
Cox and Snell r^2.........: 0.105714
Nagelkerke's pseudo r^2...: 0.152862
Sample size of AE DB......: 2423
Sample size of model......: 1182
Missing data %............: 51.2175
- processing IPH
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Gendermale ORdate_year
381.3362 0.1826 0.6048 -0.1900
Degrees of Freedom: 1178 Total (i.e. Null); 1175 Residual
Null Deviance: 1578
Residual Deviance: 1483 AIC: 1491
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.0163 -1.1800 0.7413 0.9609 1.6978
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 386.982789 46.879353 8.255 < 2e-16 ***
currentDF[, PROTEIN] 0.181469 0.069739 2.602 0.00926 **
Age 0.008978 0.006779 1.324 0.18537
Gendermale 0.603961 0.134706 4.484 7.34e-06 ***
ORdate_year -0.193095 0.023380 -8.259 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1578.0 on 1178 degrees of freedom
Residual deviance: 1481.4 on 1174 degrees of freedom
AIC: 1491.4
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' IPH ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: IPH
Effect size...............: 0.181469
Standard error............: 0.069739
Odds ratio (effect size)..: 1.199
Lower 95% CI..............: 1.046
Upper 95% CI..............: 1.375
Z-value...................: 2.602126
P-value...................: 0.009264778
Hosmer and Lemeshow r^2...: 0.061186
Cox and Snell r^2.........: 0.078628
Nagelkerke's pseudo r^2...: 0.106581
Sample size of AE DB......: 2423
Sample size of model......: 1179
Missing data %............: 51.34131
- processing MAC_binned
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Gendermale ORdate_year
261.8955 0.2204 0.5260 -0.1306
Degrees of Freedom: 1175 Total (i.e. Null); 1172 Residual
Null Deviance: 1629
Residual Deviance: 1571 AIC: 1579
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.7993 -1.1493 0.8206 1.0983 1.6704
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 258.120368 44.216573 5.838 5.29e-09 ***
currentDF[, PROTEIN] 0.221792 0.066957 3.312 0.000925 ***
Age -0.006995 0.006559 -1.066 0.286220
Gendermale 0.528275 0.131558 4.016 5.93e-05 ***
ORdate_year -0.128524 0.022049 -5.829 5.57e-09 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1629.3 on 1175 degrees of freedom
Residual deviance: 1570.2 on 1171 degrees of freedom
AIC: 1580.2
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' MAC_binned ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: MAC_binned
Effect size...............: 0.221792
Standard error............: 0.066957
Odds ratio (effect size)..: 1.248
Lower 95% CI..............: 1.095
Upper 95% CI..............: 1.423
Z-value...................: 3.312458
P-value...................: 0.0009247989
Hosmer and Lemeshow r^2...: 0.036292
Cox and Snell r^2.........: 0.049038
Nagelkerke's pseudo r^2...: 0.065402
Sample size of AE DB......: 2423
Sample size of model......: 1176
Missing data %............: 51.46513
- processing SMC_binned
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender, family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age Gendermale
2.83719 -0.30011 -0.02675 -0.29630
Degrees of Freedom: 1176 Total (i.e. Null); 1173 Residual
Null Deviance: 1470
Residual Deviance: 1425 AIC: 1433
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.9616 -1.3190 0.7505 0.8931 1.3158
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 25.048652 45.710531 0.548 0.583703
currentDF[, PROTEIN] -0.287159 0.071979 -3.989 6.62e-05 ***
Age -0.026390 0.007197 -3.667 0.000246 ***
Gendermale -0.299149 0.144473 -2.071 0.038395 *
ORdate_year -0.011078 0.022797 -0.486 0.626996
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1470.1 on 1176 degrees of freedom
Residual deviance: 1425.2 on 1172 degrees of freedom
AIC: 1435.2
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' SMC_binned ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: SMC_binned
Effect size...............: -0.287159
Standard error............: 0.071979
Odds ratio (effect size)..: 0.75
Lower 95% CI..............: 0.652
Upper 95% CI..............: 0.864
Z-value...................: -3.989465
P-value...................: 6.622254e-05
Hosmer and Lemeshow r^2...: 0.030554
Cox and Snell r^2.........: 0.037444
Nagelkerke's pseudo r^2...: 0.0525
Sample size of AE DB......: 2423
Sample size of model......: 1177
Missing data %............: 51.42386
Analysis of MCP1_rank.
- processing CalcificationPlaque
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) ORdate_year
-451.4488 0.2255
Degrees of Freedom: 555 Total (i.e. Null); 554 Residual
Null Deviance: 749.7
Residual Deviance: 741.7 AIC: 745.7
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.6532 -1.2833 0.8799 1.0256 1.3391
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -404.18119 165.52876 -2.442 0.0146 *
currentDF[, PROTEIN] -0.09186 0.09068 -1.013 0.3110
Age 0.01195 0.01016 1.176 0.2397
Gendermale -0.15600 0.19729 -0.791 0.4291
ORdate_year 0.20155 0.08263 2.439 0.0147 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 749.67 on 555 degrees of freedom
Residual deviance: 738.30 on 551 degrees of freedom
AIC: 748.3
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' CalcificationPlaque ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: CalcificationPlaque
Effect size...............: -0.091864
Standard error............: 0.090683
Odds ratio (effect size)..: 0.912
Lower 95% CI..............: 0.764
Upper 95% CI..............: 1.09
Z-value...................: -1.013025
P-value...................: 0.3110484
Hosmer and Lemeshow r^2...: 0.015167
Cox and Snell r^2.........: 0.020242
Nagelkerke's pseudo r^2...: 0.027342
Sample size of AE DB......: 2423
Sample size of model......: 556
Missing data %............: 77.05324
- processing CollagenPlaque
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
ORdate_year, family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] ORdate_year
-780.9134 -0.4799 0.3905
Degrees of Freedom: 553 Total (i.e. Null); 551 Residual
Null Deviance: 538
Residual Deviance: 498 AIC: 504
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3527 0.3688 0.5145 0.6775 1.4039
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -801.64188 214.85023 -3.731 0.000191 ***
currentDF[, PROTEIN] -0.48686 0.12206 -3.989 6.65e-05 ***
Age -0.01852 0.01355 -1.367 0.171532
Gendermale -0.14801 0.26231 -0.564 0.572575
ORdate_year 0.40152 0.10726 3.743 0.000182 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 537.98 on 553 degrees of freedom
Residual deviance: 495.64 on 549 degrees of freedom
AIC: 505.64
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' CollagenPlaque ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: CollagenPlaque
Effect size...............: -0.486864
Standard error............: 0.122064
Odds ratio (effect size)..: 0.615
Lower 95% CI..............: 0.484
Upper 95% CI..............: 0.781
Z-value...................: -3.988597
P-value...................: 6.646533e-05
Hosmer and Lemeshow r^2...: 0.0787
Cox and Snell r^2.........: 0.073577
Nagelkerke's pseudo r^2...: 0.118419
Sample size of AE DB......: 2423
Sample size of model......: 554
Missing data %............: 77.13578
- processing Fat10Perc
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Gender, family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Gendermale
1.2197 0.6668 0.5508
Degrees of Freedom: 555 Total (i.e. Null); 553 Residual
Null Deviance: 538.8
Residual Deviance: 497.2 AIC: 503.2
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.4515 0.3661 0.5131 0.6573 1.4042
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.478e+02 2.145e+02 -1.156 0.248
currentDF[, PROTEIN] 6.953e-01 1.238e-01 5.617 1.94e-08 ***
Age 4.338e-03 1.299e-02 0.334 0.738
Gendermale 5.263e-01 2.365e-01 2.226 0.026 *
ORdate_year 1.242e-01 1.071e-01 1.160 0.246
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 538.82 on 555 degrees of freedom
Residual deviance: 495.66 on 551 degrees of freedom
AIC: 505.66
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' Fat10Perc ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: Fat10Perc
Effect size...............: 0.695328
Standard error............: 0.123786
Odds ratio (effect size)..: 2.004
Lower 95% CI..............: 1.573
Upper 95% CI..............: 2.555
Z-value...................: 5.617164
P-value...................: 1.941174e-08
Hosmer and Lemeshow r^2...: 0.080102
Cox and Snell r^2.........: 0.07469
Nagelkerke's pseudo r^2...: 0.120356
Sample size of AE DB......: 2423
Sample size of model......: 556
Missing data %............: 77.05324
- processing IPH
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ Age + Gender, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) Age Gendermale
-0.76646 0.02073 0.78990
Degrees of Freedom: 555 Total (i.e. Null); 553 Residual
Null Deviance: 611.8
Residual Deviance: 594.4 AIC: 600.4
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.0029 0.5582 0.6468 0.7206 1.1969
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 15.339755 191.148455 0.080 0.936038
currentDF[, PROTEIN] 0.064053 0.104480 0.613 0.539831
Age 0.021375 0.011622 1.839 0.065905 .
Gendermale 0.774693 0.212151 3.652 0.000261 ***
ORdate_year -0.008053 0.095422 -0.084 0.932743
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 611.78 on 555 degrees of freedom
Residual deviance: 593.99 on 551 degrees of freedom
AIC: 603.99
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' IPH ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: IPH
Effect size...............: 0.064053
Standard error............: 0.10448
Odds ratio (effect size)..: 1.066
Lower 95% CI..............: 0.869
Upper 95% CI..............: 1.308
Z-value...................: 0.613068
P-value...................: 0.5398311
Hosmer and Lemeshow r^2...: 0.029089
Cox and Snell r^2.........: 0.031501
Nagelkerke's pseudo r^2...: 0.047211
Sample size of AE DB......: 2423
Sample size of model......: 556
Missing data %............: 77.05324
- processing MAC_binned
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Gendermale ORdate_year
-823.9069 0.3857 0.3390 0.4112
Degrees of Freedom: 551 Total (i.e. Null); 548 Residual
Null Deviance: 749.1
Residual Deviance: 711.3 AIC: 719.3
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.9353 -1.1973 0.7687 1.0163 1.6249
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -839.30600 175.54024 -4.781 1.74e-06 ***
currentDF[, PROTEIN] 0.37956 0.09495 3.998 6.40e-05 ***
Age -0.01358 0.01043 -1.302 0.1928
Gendermale 0.34867 0.19858 1.756 0.0791 .
ORdate_year 0.41935 0.08763 4.785 1.71e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 749.15 on 551 degrees of freedom
Residual deviance: 709.62 on 547 degrees of freedom
AIC: 719.62
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' MAC_binned ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: MAC_binned
Effect size...............: 0.379557
Standard error............: 0.094948
Odds ratio (effect size)..: 1.462
Lower 95% CI..............: 1.213
Upper 95% CI..............: 1.761
Z-value...................: 3.997506
P-value...................: 6.401333e-05
Hosmer and Lemeshow r^2...: 0.052768
Cox and Snell r^2.........: 0.06911
Nagelkerke's pseudo r^2...: 0.093064
Sample size of AE DB......: 2423
Sample size of model......: 552
Missing data %............: 77.21832
- processing SMC_binned
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age Gendermale ORdate_year
-331.25779 -0.44085 -0.03875 -0.59679 0.16731
Degrees of Freedom: 552 Total (i.e. Null); 548 Residual
Null Deviance: 667.1
Residual Deviance: 622.7 AIC: 632.7
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.1704 -1.2187 0.6567 0.8334 1.4336
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -331.25779 184.63912 -1.794 0.07280 .
currentDF[, PROTEIN] -0.44085 0.10541 -4.182 2.88e-05 ***
Age -0.03875 0.01183 -3.276 0.00105 **
Gendermale -0.59679 0.23370 -2.554 0.01066 *
ORdate_year 0.16731 0.09218 1.815 0.06952 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 667.10 on 552 degrees of freedom
Residual deviance: 622.68 on 548 degrees of freedom
AIC: 632.68
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' SMC_binned ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: SMC_binned
Effect size...............: -0.440853
Standard error............: 0.105406
Odds ratio (effect size)..: 0.643
Lower 95% CI..............: 0.523
Upper 95% CI..............: 0.791
Z-value...................: -4.182437
P-value...................: 2.88401e-05
Hosmer and Lemeshow r^2...: 0.066596
Cox and Snell r^2.........: 0.077195
Nagelkerke's pseudo r^2...: 0.110168
Sample size of AE DB......: 2423
Sample size of model......: 553
Missing data %............: 77.17705
cat("Edit the column names...\n")Edit the column names...
colnames(GLM.results) = c("Dataset", "Predictor", "Trait",
"Beta", "s.e.m.",
"OR", "low95CI", "up95CI",
"Z-value", "P-value", "r^2_l", "r^2_cs", "r^2_nagelkerke", "AE_N", "Model_N", "Perc_Miss")
cat("Correct the variable types...\n")Correct the variable types...
GLM.results$Beta <- as.numeric(GLM.results$Beta)
GLM.results$s.e.m. <- as.numeric(GLM.results$s.e.m.)
GLM.results$OR <- as.numeric(GLM.results$OR)
GLM.results$low95CI <- as.numeric(GLM.results$low95CI)
GLM.results$up95CI <- as.numeric(GLM.results$up95CI)
GLM.results$`Z-value` <- as.numeric(GLM.results$`Z-value`)
GLM.results$`P-value` <- as.numeric(GLM.results$`P-value`)
GLM.results$`r^2_l` <- as.numeric(GLM.results$`r^2_l`)
GLM.results$`r^2_cs` <- as.numeric(GLM.results$`r^2_cs`)
GLM.results$`r^2_nagelkerke` <- as.numeric(GLM.results$`r^2_nagelkerke`)
GLM.results$`AE_N` <- as.numeric(GLM.results$`AE_N`)
GLM.results$`Model_N` <- as.numeric(GLM.results$`Model_N`)
GLM.results$`Perc_Miss` <- as.numeric(GLM.results$`Perc_Miss`)
# Save the data
cat("Writing results to Excel-file...\n")Writing results to Excel-file...
### Univariate
write.xlsx(GLM.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Bin.Uni.Protein.PlaquePhenotypes.RANK.MODEL1.xlsx"),
row.names = FALSE, col.names = TRUE, sheetName = "Bin.Uni.PlaquePheno")
# Removing intermediates
cat("Removing intermediate files...\n")Removing intermediate files...
rm(TRAIT, trait, currentDF, GLM.results, GLM.results.TEMP, fit, model_step)In this model we correct for Age, Gender, year of surgery, Hypertension status, Diabetes status, current smoker status, lipid-lowering drugs (LLDs), antiplatelet medication, eGFR (MDRD), BMI, MedHx_CVD (combination of CAD history, stroke history, and peripheral interventions), and stenosis.
Here we use the inverse-rank normalized data - visually this is more normally distributed.
Analysis of continuous/quantitative plaque traits as a function of plaque MCP1 levels.
GLM.results <- data.frame(matrix(NA, ncol = 15, nrow = 0))
cat("Running linear regression...\n")Running linear regression...
for (protein in 1:length(TRAITS.PROTEIN.RANK)) {
PROTEIN = TRAITS.PROTEIN.RANK[protein]
cat(paste0("\nAnalysis of ",PROTEIN,".\n"))
for (trait in 1:length(TRAITS.CON.RANK)) {
TRAIT = TRAITS.CON.RANK[trait]
cat(paste0("\n- processing ",TRAIT,"\n\n"))
currentDF <- as.data.frame(AEDB.CEA %>%
dplyr::select(., PROTEIN, TRAIT, COVARIATES_M2) %>%
filter(complete.cases(.))) %>%
filter_if(~is.numeric(.), all_vars(!is.infinite(.)))
# for debug
# print(DT::datatable(currentDF))
# print(nrow(currentDF))
# print(str(currentDF))
### univariate
fit <- lm(currentDF[,PROTEIN] ~ currentDF[,TRAIT] + Age + Gender + ORdate_year +
Hypertension.composite + DiabetesStatus + SmokerStatus +
Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI +
MedHx_CVD + stenose,
data = currentDF)
model_step <- stepAIC(fit, direction = "both", trace = FALSE)
print(model_step)
print(summary(fit))
GLM.results.TEMP <- data.frame(matrix(NA, ncol = 15, nrow = 0))
GLM.results.TEMP[1,] = GLM.CON(fit, "AEDB.CEA", PROTEIN, TRAIT, verbose = TRUE)
GLM.results = rbind(GLM.results, GLM.results.TEMP)
}
}
Analysis of MCP1_pg_ml_2015_rank.
- processing Macrophages_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Gender +
ORdate_year + Hypertension.composite + Med.Statin.LLD, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Gendermale ORdate_year Hypertension.compositeyes
-260.49868 0.06069 0.29422 0.12984 -0.13333
Med.Statin.LLDyes
-0.21052
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-2.90020 -0.57577 -0.02735 0.60041 3.00874
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.635e+02 2.097e+01 -12.563 < 2e-16 ***
currentDF[, TRAIT] 5.919e-02 2.995e-02 1.977 0.04835 *
Age 1.825e-03 3.597e-03 0.507 0.61205
Gendermale 3.196e-01 6.437e-02 4.964 8.1e-07 ***
ORdate_year 1.313e-01 1.046e-02 12.553 < 2e-16 ***
Hypertension.compositeyes -1.348e-01 8.759e-02 -1.539 0.12424
DiabetesStatusDiabetes -4.051e-02 7.024e-02 -0.577 0.56426
SmokerStatusEx-smoker -5.669e-02 6.636e-02 -0.854 0.39319
SmokerStatusNever smoked 2.783e-02 9.370e-02 0.297 0.76651
Med.Statin.LLDyes -2.103e-01 7.096e-02 -2.964 0.00311 **
Med.all.antiplateletyes 6.384e-02 9.880e-02 0.646 0.51833
GFR_MDRD -4.344e-04 1.529e-03 -0.284 0.77647
BMI -2.830e-03 8.022e-03 -0.353 0.72432
MedHx_CVDyes 5.212e-03 6.028e-02 0.086 0.93111
stenose50-70% -1.761e-01 3.922e-01 -0.449 0.65353
stenose70-90% 4.823e-03 3.765e-01 0.013 0.98978
stenose90-99% -3.883e-02 3.770e-01 -0.103 0.91800
stenose100% (Occlusion) -2.378e-01 4.841e-01 -0.491 0.62344
stenose50-99% -4.599e-01 5.907e-01 -0.779 0.43637
stenose70-99% -3.348e-01 5.299e-01 -0.632 0.52770
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.911 on 1001 degrees of freedom
Multiple R-squared: 0.1751, Adjusted R-squared: 0.1595
F-statistic: 11.19 on 19 and 1001 DF, p-value: < 2.2e-16
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' Macrophages_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: Macrophages_rank
Effect size...............: 0.059193
Standard error............: 0.029945
Odds ratio (effect size)..: 1.061
Lower 95% CI..............: 1.001
Upper 95% CI..............: 1.125
T-value...................: 1.976704
P-value...................: 0.04834925
R^2.......................: 0.175135
Adjusted r^2..............: 0.159478
Sample size of AE DB......: 2423
Sample size of model......: 1021
Missing data %............: 57.86215
- processing SMC_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Gender +
ORdate_year + Hypertension.composite + Med.Statin.LLD, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Gendermale ORdate_year Hypertension.compositeyes
-235.47131 -0.09684 0.26854 0.11737 -0.13276
Med.Statin.LLDyes
-0.19071
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.06717 -0.59118 -0.01529 0.56640 3.08389
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.401e+02 2.070e+01 -11.601 < 2e-16 ***
currentDF[, TRAIT] -9.359e-02 3.061e-02 -3.058 0.00229 **
Age -8.914e-05 3.618e-03 -0.025 0.98035
Gendermale 2.930e-01 6.519e-02 4.494 7.8e-06 ***
ORdate_year 1.197e-01 1.032e-02 11.598 < 2e-16 ***
Hypertension.compositeyes -1.246e-01 8.751e-02 -1.424 0.15478
DiabetesStatusDiabetes -4.013e-02 7.016e-02 -0.572 0.56745
SmokerStatusEx-smoker -5.388e-02 6.638e-02 -0.812 0.41716
SmokerStatusNever smoked 2.007e-02 9.357e-02 0.214 0.83020
Med.Statin.LLDyes -1.952e-01 7.081e-02 -2.756 0.00595 **
Med.all.antiplateletyes 4.912e-02 9.867e-02 0.498 0.61872
GFR_MDRD -1.574e-04 1.530e-03 -0.103 0.91809
BMI -3.457e-03 8.022e-03 -0.431 0.66658
MedHx_CVDyes 2.805e-03 6.029e-02 0.047 0.96290
stenose50-70% -1.347e-01 3.917e-01 -0.344 0.73094
stenose70-90% 5.330e-02 3.760e-01 0.142 0.88731
stenose90-99% 7.963e-03 3.766e-01 0.021 0.98313
stenose100% (Occlusion) -2.181e-01 4.833e-01 -0.451 0.65191
stenose50-99% -3.620e-01 5.902e-01 -0.613 0.53980
stenose70-99% -2.212e-01 5.293e-01 -0.418 0.67615
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9094 on 997 degrees of freedom
Multiple R-squared: 0.1788, Adjusted R-squared: 0.1631
F-statistic: 11.42 on 19 and 997 DF, p-value: < 2.2e-16
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' SMC_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: SMC_rank
Effect size...............: -0.093585
Standard error............: 0.030608
Odds ratio (effect size)..: 0.911
Lower 95% CI..............: 0.858
Upper 95% CI..............: 0.967
T-value...................: -3.057532
P-value...................: 0.002291186
R^2.......................: 0.17877
Adjusted r^2..............: 0.16312
Sample size of AE DB......: 2423
Sample size of model......: 1017
Missing data %............: 58.02724
- processing MAC_SMC_ratio_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Gender +
ORdate_year + Hypertension.composite + Med.Statin.LLD, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Gendermale ORdate_year Hypertension.compositeyes
-257.0356 0.1286 0.2537 0.1281 -0.1341
Med.Statin.LLDyes
-0.2143
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-2.87932 -0.58271 -0.01362 0.55751 3.02886
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.621e+02 2.028e+01 -12.925 < 2e-16 ***
currentDF[, TRAIT] 1.256e-01 2.973e-02 4.227 2.59e-05 ***
Age 1.042e-03 3.587e-03 0.291 0.77144
Gendermale 2.771e-01 6.516e-02 4.252 2.32e-05 ***
ORdate_year 1.306e-01 1.011e-02 12.916 < 2e-16 ***
Hypertension.compositeyes -1.291e-01 8.719e-02 -1.481 0.13889
DiabetesStatusDiabetes -3.981e-02 6.990e-02 -0.570 0.56914
SmokerStatusEx-smoker -5.634e-02 6.623e-02 -0.851 0.39520
SmokerStatusNever smoked 2.452e-03 9.355e-02 0.026 0.97910
Med.Statin.LLDyes -2.159e-01 7.072e-02 -3.053 0.00232 **
Med.all.antiplateletyes 6.327e-02 9.826e-02 0.644 0.51976
GFR_MDRD -2.742e-04 1.523e-03 -0.180 0.85713
BMI -3.173e-03 8.000e-03 -0.397 0.69171
MedHx_CVDyes -4.603e-03 6.019e-02 -0.076 0.93906
stenose50-70% -1.642e-01 3.904e-01 -0.421 0.67409
stenose70-90% 2.382e-02 3.744e-01 0.064 0.94928
stenose90-99% -4.335e-03 3.750e-01 -0.012 0.99078
stenose100% (Occlusion) -1.729e-01 4.818e-01 -0.359 0.71968
stenose50-99% -4.007e-01 5.875e-01 -0.682 0.49537
stenose70-99% -2.931e-01 5.268e-01 -0.556 0.57804
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.906 on 996 degrees of freedom
Multiple R-squared: 0.1857, Adjusted R-squared: 0.1701
F-statistic: 11.95 on 19 and 996 DF, p-value: < 2.2e-16
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' MAC_SMC_ratio_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: MAC_SMC_ratio_rank
Effect size...............: 0.125649
Standard error............: 0.029729
Odds ratio (effect size)..: 1.134
Lower 95% CI..............: 1.07
Upper 95% CI..............: 1.202
T-value...................: 4.226533
P-value...................: 2.591157e-05
R^2.......................: 0.185682
Adjusted r^2..............: 0.170148
Sample size of AE DB......: 2423
Sample size of model......: 1016
Missing data %............: 58.06851
- processing VesselDensity_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Gender +
ORdate_year + Hypertension.composite + Med.Statin.LLD, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Gendermale ORdate_year Hypertension.compositeyes
-232.26066 -0.07525 0.29943 0.11576 -0.12715
Med.Statin.LLDyes
-0.21061
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-2.96576 -0.59822 -0.00485 0.58684 3.01447
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.349e+02 2.150e+01 -10.925 < 2e-16 ***
currentDF[, TRAIT] -7.728e-02 3.096e-02 -2.496 0.01273 *
Age 1.892e-03 3.757e-03 0.504 0.61463
Gendermale 3.218e-01 6.710e-02 4.796 1.88e-06 ***
ORdate_year 1.171e-01 1.073e-02 10.912 < 2e-16 ***
Hypertension.compositeyes -1.342e-01 9.157e-02 -1.466 0.14306
DiabetesStatusDiabetes -5.770e-02 7.505e-02 -0.769 0.44220
SmokerStatusEx-smoker -4.766e-02 6.953e-02 -0.685 0.49321
SmokerStatusNever smoked 7.042e-03 9.807e-02 0.072 0.94278
Med.Statin.LLDyes -2.113e-01 7.346e-02 -2.877 0.00411 **
Med.all.antiplateletyes 7.706e-02 1.049e-01 0.735 0.46265
GFR_MDRD -6.724e-04 1.607e-03 -0.418 0.67581
BMI -2.815e-04 8.387e-03 -0.034 0.97323
MedHx_CVDyes 1.458e-02 6.296e-02 0.232 0.81686
stenose50-70% -2.957e-01 4.327e-01 -0.683 0.49451
stenose70-90% -5.852e-02 4.163e-01 -0.141 0.88824
stenose90-99% -1.188e-01 4.164e-01 -0.285 0.77550
stenose100% (Occlusion) -3.299e-01 5.170e-01 -0.638 0.52349
stenose50-99% -3.890e-01 6.203e-01 -0.627 0.53068
stenose70-99% -5.688e-01 6.212e-01 -0.916 0.36010
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9202 on 933 degrees of freedom
Multiple R-squared: 0.17, Adjusted R-squared: 0.1531
F-statistic: 10.06 on 19 and 933 DF, p-value: < 2.2e-16
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' VesselDensity_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: VesselDensity_rank
Effect size...............: -0.077283
Standard error............: 0.030963
Odds ratio (effect size)..: 0.926
Lower 95% CI..............: 0.871
Upper 95% CI..............: 0.984
T-value...................: -2.496001
P-value...................: 0.01273189
R^2.......................: 0.169991
Adjusted r^2..............: 0.153088
Sample size of AE DB......: 2423
Sample size of model......: 953
Missing data %............: 60.66859
Analysis of MCP1_rank.
- processing Macrophages_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Gender +
ORdate_year + Hypertension.composite, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Gendermale ORdate_year Hypertension.compositeyes
441.5668 0.1036 0.2776 -0.2203 -0.2432
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.3192 -0.6253 0.0206 0.6596 2.6344
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.154e+02 8.330e+01 4.987 8.60e-07 ***
currentDF[, TRAIT] 9.996e-02 4.118e-02 2.427 0.01558 *
Age -9.280e-03 5.771e-03 -1.608 0.10849
Gendermale 3.003e-01 1.002e-01 2.997 0.00287 **
ORdate_year -2.067e-01 4.157e-02 -4.972 9.23e-07 ***
Hypertension.compositeyes -2.382e-01 1.329e-01 -1.791 0.07385 .
DiabetesStatusDiabetes -6.961e-02 1.124e-01 -0.619 0.53601
SmokerStatusEx-smoker 8.372e-02 9.983e-02 0.839 0.40209
SmokerStatusNever smoked 2.684e-01 1.476e-01 1.819 0.06960 .
Med.Statin.LLDyes -1.509e-01 1.035e-01 -1.457 0.14568
Med.all.antiplateletyes 1.368e-01 1.587e-01 0.862 0.38929
GFR_MDRD -1.657e-04 2.489e-03 -0.067 0.94696
BMI -1.297e-02 1.190e-02 -1.090 0.27621
MedHx_CVDyes 2.265e-02 9.344e-02 0.242 0.80855
stenose50-70% -4.499e-01 6.185e-01 -0.727 0.46738
stenose70-90% -2.733e-01 5.744e-01 -0.476 0.63444
stenose90-99% -2.510e-01 5.728e-01 -0.438 0.66144
stenose100% (Occlusion) -9.705e-01 7.264e-01 -1.336 0.18217
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9725 on 479 degrees of freedom
Multiple R-squared: 0.1108, Adjusted R-squared: 0.0792
F-statistic: 3.51 on 17 and 479 DF, p-value: 3.139e-06
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' Macrophages_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: Macrophages_rank
Effect size...............: 0.099963
Standard error............: 0.041184
Odds ratio (effect size)..: 1.105
Lower 95% CI..............: 1.019
Upper 95% CI..............: 1.198
T-value...................: 2.427249
P-value...................: 0.01558152
R^2.......................: 0.110762
Adjusted r^2..............: 0.079203
Sample size of AE DB......: 2423
Sample size of model......: 497
Missing data %............: 79.48824
- processing SMC_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year + Hypertension.composite, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Age Gendermale ORdate_year
511.09348 -0.22506 -0.01132 0.23728 -0.25465
Hypertension.compositeyes
-0.19903
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.1076 -0.6197 -0.0034 0.6938 2.4632
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.972e+02 8.244e+01 6.031 3.26e-09 ***
currentDF[, TRAIT] -2.277e-01 4.248e-02 -5.361 1.29e-07 ***
Age -1.495e-02 5.694e-03 -2.626 0.00891 **
Gendermale 2.430e-01 9.865e-02 2.463 0.01411 *
ORdate_year -2.474e-01 4.114e-02 -6.013 3.63e-09 ***
Hypertension.compositeyes -2.009e-01 1.285e-01 -1.563 0.11877
DiabetesStatusDiabetes -7.173e-02 1.095e-01 -0.655 0.51256
SmokerStatusEx-smoker 1.214e-01 9.718e-02 1.249 0.21228
SmokerStatusNever smoked 2.460e-01 1.435e-01 1.714 0.08712 .
Med.Statin.LLDyes -1.412e-01 1.011e-01 -1.397 0.16307
Med.all.antiplateletyes 1.136e-01 1.545e-01 0.735 0.46259
GFR_MDRD 2.431e-05 2.423e-03 0.010 0.99200
BMI -1.152e-02 1.157e-02 -0.996 0.31990
MedHx_CVDyes 1.966e-02 9.120e-02 0.216 0.82941
stenose50-70% -3.893e-01 6.021e-01 -0.647 0.51816
stenose70-90% -2.757e-01 5.591e-01 -0.493 0.62215
stenose90-99% -2.863e-01 5.573e-01 -0.514 0.60768
stenose100% (Occlusion) -1.135e+00 7.069e-01 -1.605 0.10909
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9465 on 477 degrees of freedom
Multiple R-squared: 0.1508, Adjusted R-squared: 0.1206
F-statistic: 4.983 on 17 and 477 DF, p-value: 5.352e-10
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' SMC_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: SMC_rank
Effect size...............: -0.22772
Standard error............: 0.042481
Odds ratio (effect size)..: 0.796
Lower 95% CI..............: 0.733
Upper 95% CI..............: 0.865
T-value...................: -5.360554
P-value...................: 1.29467e-07
R^2.......................: 0.150815
Adjusted r^2..............: 0.12055
Sample size of AE DB......: 2423
Sample size of model......: 495
Missing data %............: 79.57078
- processing MAC_SMC_ratio_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year + Hypertension.composite + Med.Statin.LLD,
data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Age Gendermale ORdate_year
467.386591 0.217255 -0.009929 0.229963 -0.232837
Hypertension.compositeyes Med.Statin.LLDyes
-0.219314 -0.151354
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.2086 -0.6173 -0.0234 0.6782 2.6562
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.666e+02 8.141e+01 5.732 1.77e-08 ***
currentDF[, TRAIT] 2.178e-01 3.972e-02 5.485 6.72e-08 ***
Age -1.198e-02 5.637e-03 -2.125 0.0341 *
Gendermale 2.435e-01 9.849e-02 2.472 0.0138 *
ORdate_year -2.322e-01 4.063e-02 -5.714 1.94e-08 ***
Hypertension.compositeyes -2.375e-01 1.292e-01 -1.838 0.0666 .
DiabetesStatusDiabetes -6.732e-02 1.092e-01 -0.617 0.5378
SmokerStatusEx-smoker 7.512e-02 9.692e-02 0.775 0.4387
SmokerStatusNever smoked 2.284e-01 1.434e-01 1.593 0.1119
Med.Statin.LLDyes -1.495e-01 1.010e-01 -1.480 0.1394
Med.all.antiplateletyes 1.235e-01 1.541e-01 0.801 0.4235
GFR_MDRD 2.685e-04 2.420e-03 0.111 0.9117
BMI -1.346e-02 1.155e-02 -1.165 0.2445
MedHx_CVDyes -1.570e-02 9.116e-02 -0.172 0.8633
stenose50-70% -3.589e-01 6.008e-01 -0.597 0.5505
stenose70-90% -3.064e-01 5.577e-01 -0.549 0.5830
stenose90-99% -2.365e-01 5.560e-01 -0.425 0.6708
stenose100% (Occlusion) -1.056e+00 7.048e-01 -1.499 0.1346
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.944 on 476 degrees of freedom
Multiple R-squared: 0.155, Adjusted R-squared: 0.1248
F-statistic: 5.135 on 17 and 476 DF, p-value: 2.17e-10
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' MAC_SMC_ratio_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: MAC_SMC_ratio_rank
Effect size...............: 0.217845
Standard error............: 0.039715
Odds ratio (effect size)..: 1.243
Lower 95% CI..............: 1.15
Upper 95% CI..............: 1.344
T-value...................: 5.485155
P-value...................: 6.719875e-08
R^2.......................: 0.154968
Adjusted r^2..............: 0.124788
Sample size of AE DB......: 2423
Sample size of model......: 494
Missing data %............: 79.61205
- processing VesselDensity_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ Gender + ORdate_year, data = currentDF)
Coefficients:
(Intercept) Gendermale ORdate_year
463.7343 0.3103 -0.2315
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.3250 -0.6474 0.0106 0.6218 2.5297
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.218e+02 8.620e+01 4.893 1.37e-06 ***
currentDF[, TRAIT] -4.850e-02 5.471e-02 -0.887 0.3758
Age -9.513e-03 5.860e-03 -1.623 0.1052
Gendermale 3.285e-01 1.016e-01 3.235 0.0013 **
ORdate_year -2.100e-01 4.302e-02 -4.880 1.46e-06 ***
Hypertension.compositeyes -1.645e-01 1.355e-01 -1.214 0.2254
DiabetesStatusDiabetes -3.384e-02 1.148e-01 -0.295 0.7683
SmokerStatusEx-smoker 9.358e-02 1.014e-01 0.923 0.3567
SmokerStatusNever smoked 2.664e-01 1.497e-01 1.780 0.0758 .
Med.Statin.LLDyes -1.516e-01 1.052e-01 -1.442 0.1500
Med.all.antiplateletyes 1.357e-01 1.616e-01 0.840 0.4015
GFR_MDRD 6.123e-04 2.560e-03 0.239 0.8111
BMI -1.108e-02 1.208e-02 -0.917 0.3595
MedHx_CVDyes 3.668e-02 9.504e-02 0.386 0.6997
stenose50-70% -5.382e-01 6.214e-01 -0.866 0.3869
stenose70-90% -2.824e-01 5.775e-01 -0.489 0.6251
stenose90-99% -2.830e-01 5.758e-01 -0.491 0.6233
stenose100% (Occlusion) -1.043e+00 7.302e-01 -1.429 0.1538
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9771 on 469 degrees of freedom
Multiple R-squared: 0.1035, Adjusted R-squared: 0.07105
F-statistic: 3.186 on 17 and 469 DF, p-value: 2.002e-05
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' VesselDensity_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: VesselDensity_rank
Effect size...............: -0.048502
Standard error............: 0.054711
Odds ratio (effect size)..: 0.953
Lower 95% CI..............: 0.856
Upper 95% CI..............: 1.06
T-value...................: -0.886513
P-value...................: 0.3757955
R^2.......................: 0.103542
Adjusted r^2..............: 0.071048
Sample size of AE DB......: 2423
Sample size of model......: 487
Missing data %............: 79.90095
cat("Edit the column names...\n")Edit the column names...
colnames(GLM.results) = c("Dataset", "Predictor", "Trait",
"Beta", "s.e.m.",
"OR", "low95CI", "up95CI",
"T-value", "P-value", "r^2", "r^2_adj", "AE_N", "Model_N", "Perc_Miss")
cat("Correct the variable types...\n")Correct the variable types...
GLM.results$Beta <- as.numeric(GLM.results$Beta)
GLM.results$s.e.m. <- as.numeric(GLM.results$s.e.m.)
GLM.results$OR <- as.numeric(GLM.results$OR)
GLM.results$low95CI <- as.numeric(GLM.results$low95CI)
GLM.results$up95CI <- as.numeric(GLM.results$up95CI)
GLM.results$`T-value` <- as.numeric(GLM.results$`T-value`)
GLM.results$`P-value` <- as.numeric(GLM.results$`P-value`)
GLM.results$`r^2` <- as.numeric(GLM.results$`r^2`)
GLM.results$`r^2_adj` <- as.numeric(GLM.results$`r^2_adj`)
GLM.results$`AE_N` <- as.numeric(GLM.results$`AE_N`)
GLM.results$`Model_N` <- as.numeric(GLM.results$`Model_N`)
GLM.results$`Perc_Miss` <- as.numeric(GLM.results$`Perc_Miss`)
# Save the data
cat("Writing results to Excel-file...\n")Writing results to Excel-file...
### Univariate
library(openxlsx)
write.xlsx(GLM.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Con.Multi.Protein.PlaquePhenotypes.RANK.MODEL2.xlsx"),
row.names = FALSE, col.names = TRUE, sheetName = "Con.Multi.PlaquePheno")
# Removing intermediates
cat("Removing intermediate files...\n")Removing intermediate files...
rm(TRAIT, trait, currentDF, GLM.results, GLM.results.TEMP, fit, model_step)Analysis of binary plaque traits as a function of plaque MCP1 levels.
GLM.results <- data.frame(matrix(NA, ncol = 16, nrow = 0))
for (protein in 1:length(TRAITS.PROTEIN.RANK)) {
PROTEIN = TRAITS.PROTEIN.RANK[protein]
cat(paste0("\nAnalysis of ",PROTEIN,".\n"))
for (trait in 1:length(TRAITS.BIN)) {
TRAIT = TRAITS.BIN[trait]
cat(paste0("\n- processing ",TRAIT,"\n\n"))
currentDF <- as.data.frame(AEDB.CEA %>%
dplyr::select(., PROTEIN, TRAIT, COVARIATES_M2) %>%
filter(complete.cases(.))) %>%
filter_if(~is.numeric(.), all_vars(!is.infinite(.)))
# for debug
# print(DT::datatable(currentDF))
# print(nrow(currentDF))
# print(str(currentDF))
# print(class(currentDF[,TRAIT]))
### univariate
fit <- glm(as.factor(currentDF[,TRAIT]) ~ currentDF[,PROTEIN] + Age + Gender + ORdate_year +
Hypertension.composite + DiabetesStatus + SmokerStatus +
Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI +
MedHx_CVD + stenose,
data = currentDF, family = binomial(link = "logit"))
model_step <- stepAIC(fit, direction = "both", trace = FALSE)
print(model_step)
print(summary(fit))
GLM.results.TEMP <- data.frame(matrix(NA, ncol = 16, nrow = 0))
GLM.results.TEMP[1,] = GLM.BIN(fit, "AEDB.CEA", PROTEIN, TRAIT, verbose = TRUE)
GLM.results = rbind(GLM.results, GLM.results.TEMP)
}
}
Analysis of MCP1_pg_ml_2015_rank.
- processing CalcificationPlaque
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + ORdate_year + SmokerStatus, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age ORdate_year SmokerStatusEx-smoker
279.63668 -0.39476 0.02869 -0.14023 -0.41699
SmokerStatusNever smoked
-0.46274
Degrees of Freedom: 1025 Total (i.e. Null); 1020 Residual
Null Deviance: 1420
Residual Deviance: 1308 AIC: 1320
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.863 -1.045 -0.608 1.074 2.130
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 257.844061 50.214433 5.135 2.82e-07 ***
currentDF[, PROTEIN] -0.402700 0.075598 -5.327 9.99e-08 ***
Age 0.029437 0.008526 3.453 0.000555 ***
Gendermale -0.003628 0.150954 -0.024 0.980824
ORdate_year -0.129597 0.025058 -5.172 2.32e-07 ***
Hypertension.compositeyes 0.283761 0.205758 1.379 0.167863
DiabetesStatusDiabetes -0.231758 0.164388 -1.410 0.158591
SmokerStatusEx-smoker -0.428649 0.155564 -2.755 0.005861 **
SmokerStatusNever smoked -0.502689 0.218425 -2.301 0.021368 *
Med.Statin.LLDyes -0.024746 0.165034 -0.150 0.880809
Med.all.antiplateletyes -0.052996 0.228424 -0.232 0.816534
GFR_MDRD 0.001570 0.003596 0.437 0.662445
BMI 0.021001 0.018720 1.122 0.261931
MedHx_CVDyes -0.040669 0.140089 -0.290 0.771581
stenose50-70% -0.823178 0.929921 -0.885 0.376042
stenose70-90% -0.353208 0.888418 -0.398 0.690948
stenose90-99% -0.317638 0.889813 -0.357 0.721113
stenose100% (Occlusion) 0.802490 1.222297 0.657 0.511475
stenose50-99% -14.187677 432.155336 -0.033 0.973810
stenose70-99% -0.451708 1.232581 -0.366 0.714012
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1420.3 on 1025 degrees of freedom
Residual deviance: 1294.3 on 1006 degrees of freedom
AIC: 1334.3
Number of Fisher Scoring iterations: 13
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' CalcificationPlaque ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: CalcificationPlaque
Effect size...............: -0.4027
Standard error............: 0.075598
Odds ratio (effect size)..: 0.669
Lower 95% CI..............: 0.576
Upper 95% CI..............: 0.775
Z-value...................: -5.32687
P-value...................: 9.991985e-08
Hosmer and Lemeshow r^2...: 0.088671
Cox and Snell r^2.........: 0.115512
Nagelkerke's pseudo r^2...: 0.154119
Sample size of AE DB......: 2423
Sample size of model......: 1026
Missing data %............: 57.6558
- processing CollagenPlaque
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
ORdate_year + SmokerStatus + BMI + MedHx_CVD, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] ORdate_year SmokerStatusEx-smoker SmokerStatusNever smoked
-82.29156 -0.31120 0.04123 -0.38704 -0.66237
BMI MedHx_CVDyes
0.04011 0.24788
Degrees of Freedom: 1026 Total (i.e. Null); 1020 Residual
Null Deviance: 1049
Residual Deviance: 1021 AIC: 1035
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3321 0.4334 0.6127 0.7234 1.1601
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.689e+01 9.545e+02 -0.039 0.969169
currentDF[, PROTEIN] -3.104e-01 8.728e-02 -3.556 0.000377 ***
Age 1.523e-02 9.880e-03 1.541 0.123267
Gendermale 3.495e-02 1.787e-01 0.196 0.844926
ORdate_year 2.524e-02 2.928e-02 0.862 0.388654
Hypertension.compositeyes 2.493e-01 2.289e-01 1.089 0.276119
DiabetesStatusDiabetes 7.073e-02 1.979e-01 0.358 0.720716
SmokerStatusEx-smoker -4.601e-01 1.902e-01 -2.419 0.015552 *
SmokerStatusNever smoked -7.831e-01 2.492e-01 -3.142 0.001676 **
Med.Statin.LLDyes -8.771e-04 1.935e-01 -0.005 0.996383
Med.all.antiplateletyes 2.678e-01 2.604e-01 1.029 0.303619
GFR_MDRD 5.127e-03 4.253e-03 1.206 0.227994
BMI 4.255e-02 2.333e-02 1.824 0.068186 .
MedHx_CVDyes 2.191e-01 1.637e-01 1.339 0.180703
stenose50-70% -1.490e+01 9.527e+02 -0.016 0.987519
stenose70-90% -1.519e+01 9.527e+02 -0.016 0.987282
stenose90-99% -1.529e+01 9.527e+02 -0.016 0.987198
stenose100% (Occlusion) 3.121e-02 1.235e+03 0.000 0.999980
stenose50-99% -2.727e-01 1.512e+03 0.000 0.999856
stenose70-99% -1.484e+01 9.527e+02 -0.016 0.987573
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1048.5 on 1026 degrees of freedom
Residual deviance: 1006.6 on 1007 degrees of freedom
AIC: 1046.6
Number of Fisher Scoring iterations: 15
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' CollagenPlaque ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: CollagenPlaque
Effect size...............: -0.310382
Standard error............: 0.087283
Odds ratio (effect size)..: 0.733
Lower 95% CI..............: 0.618
Upper 95% CI..............: 0.87
Z-value...................: -3.55603
P-value...................: 0.0003765011
Hosmer and Lemeshow r^2...: 0.040027
Cox and Snell r^2.........: 0.040043
Nagelkerke's pseudo r^2...: 0.06259
Sample size of AE DB......: 2423
Sample size of model......: 1027
Missing data %............: 57.61453
- processing Fat10Perc
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + SmokerStatus, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age Gendermale ORdate_year
469.37418 0.45540 0.01347 0.86049 -0.23404
SmokerStatusEx-smoker SmokerStatusNever smoked
-0.29641 0.29609
Degrees of Freedom: 1026 Total (i.e. Null); 1020 Residual
Null Deviance: 1209
Residual Deviance: 1092 AIC: 1106
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.6326 -0.9680 0.5857 0.7829 1.6994
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 484.672877 355.498773 1.363 0.1728
currentDF[, PROTEIN] 0.447618 0.086377 5.182 2.19e-07 ***
Age 0.015842 0.009283 1.707 0.0879 .
Gendermale 0.870259 0.163696 5.316 1.06e-07 ***
ORdate_year -0.235162 0.030057 -7.824 5.12e-15 ***
Hypertension.compositeyes -0.053017 0.230251 -0.230 0.8179
DiabetesStatusDiabetes -0.183150 0.181689 -1.008 0.3134
SmokerStatusEx-smoker -0.316266 0.174529 -1.812 0.0700 .
SmokerStatusNever smoked 0.288237 0.255876 1.126 0.2600
Med.Statin.LLDyes -0.049076 0.191425 -0.256 0.7977
Med.all.antiplateletyes 0.096033 0.259188 0.371 0.7110
GFR_MDRD 0.001989 0.003985 0.499 0.6176
BMI 0.005657 0.020516 0.276 0.7828
MedHx_CVDyes 0.093233 0.157184 0.593 0.5531
stenose50-70% -13.345325 350.353303 -0.038 0.9696
stenose70-90% -13.480479 350.353191 -0.038 0.9693
stenose90-99% -13.551667 350.353198 -0.039 0.9691
stenose100% (Occlusion) -14.180536 350.353926 -0.040 0.9677
stenose50-99% -14.930710 350.355179 -0.043 0.9660
stenose70-99% -13.822004 350.354155 -0.039 0.9685
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1209.2 on 1026 degrees of freedom
Residual deviance: 1082.9 on 1007 degrees of freedom
AIC: 1122.9
Number of Fisher Scoring iterations: 13
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' Fat10Perc ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: Fat10Perc
Effect size...............: 0.447618
Standard error............: 0.086377
Odds ratio (effect size)..: 1.565
Lower 95% CI..............: 1.321
Upper 95% CI..............: 1.853
Z-value...................: 5.182132
P-value...................: 2.193639e-07
Hosmer and Lemeshow r^2...: 0.104428
Cox and Snell r^2.........: 0.115698
Nagelkerke's pseudo r^2...: 0.167211
Sample size of AE DB......: 2423
Sample size of model......: 1027
Missing data %............: 57.61453
- processing IPH
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Gender + ORdate_year + BMI + MedHx_CVD, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Gendermale ORdate_year BMI MedHx_CVDyes
392.07230 0.18061 0.50160 -0.19581 0.02969 0.39131
Degrees of Freedom: 1024 Total (i.e. Null); 1019 Residual
Null Deviance: 1371
Residual Deviance: 1281 AIC: 1293
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.1701 -1.1386 0.6982 0.9617 1.7551
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 413.430677 53.512385 7.726 1.11e-14 ***
currentDF[, PROTEIN] 0.180008 0.075716 2.377 0.017434 *
Age 0.010033 0.008479 1.183 0.236689
Gendermale 0.545667 0.152016 3.590 0.000331 ***
ORdate_year -0.206531 0.026700 -7.735 1.03e-14 ***
Hypertension.compositeyes -0.112636 0.207286 -0.543 0.586867
DiabetesStatusDiabetes -0.113506 0.165535 -0.686 0.492907
SmokerStatusEx-smoker -0.097149 0.158542 -0.613 0.540032
SmokerStatusNever smoked -0.141723 0.218025 -0.650 0.515672
Med.Statin.LLDyes -0.086084 0.170254 -0.506 0.613122
Med.all.antiplateletyes 0.103732 0.232980 0.445 0.656148
GFR_MDRD -0.002822 0.003613 -0.781 0.434817
BMI 0.036544 0.019035 1.920 0.054876 .
MedHx_CVDyes 0.365201 0.141153 2.587 0.009674 **
stenose50-70% -0.401826 0.943263 -0.426 0.670111
stenose70-90% -0.391222 0.909306 -0.430 0.667020
stenose90-99% -0.317648 0.911397 -0.349 0.727443
stenose100% (Occlusion) -0.745875 1.137193 -0.656 0.511894
stenose50-99% 0.123165 1.360810 0.091 0.927883
stenose70-99% 1.975403 1.425834 1.385 0.165919
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1371.2 on 1024 degrees of freedom
Residual deviance: 1268.6 on 1005 degrees of freedom
AIC: 1308.6
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' IPH ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: IPH
Effect size...............: 0.180008
Standard error............: 0.075716
Odds ratio (effect size)..: 1.197
Lower 95% CI..............: 1.032
Upper 95% CI..............: 1.389
Z-value...................: 2.377427
P-value...................: 0.0174339
Hosmer and Lemeshow r^2...: 0.074795
Cox and Snell r^2.........: 0.095211
Nagelkerke's pseudo r^2...: 0.12909
Sample size of AE DB......: 2423
Sample size of model......: 1025
Missing data %............: 57.69707
- processing MAC_binned
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Gender + ORdate_year + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet,
family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Gendermale ORdate_year SmokerStatusEx-smoker
275.73528 0.22784 0.50126 -0.13759 0.05857
SmokerStatusNever smoked Med.Statin.LLDyes Med.all.antiplateletyes
0.42075 0.40047 -0.33729
Degrees of Freedom: 1022 Total (i.e. Null); 1015 Residual
Null Deviance: 1417
Residual Deviance: 1359 AIC: 1375
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.903 -1.130 0.754 1.097 1.650
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 296.254795 50.487265 5.868 4.41e-09 ***
currentDF[, PROTEIN] 0.230423 0.072546 3.176 0.001492 **
Age -0.009277 0.008169 -1.136 0.256081
Gendermale 0.507404 0.147625 3.437 0.000588 ***
ORdate_year -0.146960 0.025176 -5.837 5.30e-09 ***
Hypertension.compositeyes -0.018654 0.198510 -0.094 0.925134
DiabetesStatusDiabetes -0.028077 0.159451 -0.176 0.860227
SmokerStatusEx-smoker 0.113143 0.150717 0.751 0.452835
SmokerStatusNever smoked 0.509672 0.214258 2.379 0.017370 *
Med.Statin.LLDyes 0.378901 0.161396 2.348 0.018892 *
Med.all.antiplateletyes -0.419125 0.227897 -1.839 0.065901 .
GFR_MDRD 0.001073 0.003467 0.310 0.756838
BMI -0.016377 0.018281 -0.896 0.370346
MedHx_CVDyes 0.147434 0.136619 1.079 0.280517
stenose50-70% -0.680770 0.923420 -0.737 0.460985
stenose70-90% -0.715597 0.889289 -0.805 0.421002
stenose90-99% -0.798488 0.890683 -0.896 0.369991
stenose100% (Occlusion) -1.742277 1.149497 -1.516 0.129599
stenose50-99% -0.131148 1.354973 -0.097 0.922893
stenose70-99% 0.356396 1.186652 0.300 0.763919
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1417.5 on 1022 degrees of freedom
Residual deviance: 1350.2 on 1003 degrees of freedom
AIC: 1390.2
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' MAC_binned ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: MAC_binned
Effect size...............: 0.230423
Standard error............: 0.072546
Odds ratio (effect size)..: 1.259
Lower 95% CI..............: 1.092
Upper 95% CI..............: 1.452
Z-value...................: 3.176236
P-value...................: 0.001491997
Hosmer and Lemeshow r^2...: 0.047428
Cox and Snell r^2.........: 0.063603
Nagelkerke's pseudo r^2...: 0.084824
Sample size of AE DB......: 2423
Sample size of model......: 1023
Missing data %............: 57.77961
- processing SMC_binned
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + SmokerStatus, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age Gendermale SmokerStatusEx-smoker
2.86041 -0.28454 -0.02413 -0.37184 -0.06239
SmokerStatusNever smoked
-0.42455
Degrees of Freedom: 1022 Total (i.e. Null); 1017 Residual
Null Deviance: 1260
Residual Deviance: 1218 AIC: 1230
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.0646 -1.2798 0.7265 0.8731 1.3692
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -21.133458 52.552076 -0.402 0.687579
currentDF[, PROTEIN] -0.284484 0.078214 -3.637 0.000276 ***
Age -0.021624 0.009007 -2.401 0.016362 *
Gendermale -0.400820 0.164887 -2.431 0.015062 *
ORdate_year 0.011380 0.026223 0.434 0.664310
Hypertension.compositeyes 0.176586 0.212257 0.832 0.405441
DiabetesStatusDiabetes 0.007316 0.171510 0.043 0.965977
SmokerStatusEx-smoker -0.030192 0.165026 -0.183 0.854833
SmokerStatusNever smoked -0.416898 0.222200 -1.876 0.060624 .
Med.Statin.LLDyes 0.020226 0.171804 0.118 0.906284
Med.all.antiplateletyes -0.121723 0.238580 -0.510 0.609914
GFR_MDRD 0.005240 0.003756 1.395 0.163024
BMI -0.002086 0.020132 -0.104 0.917473
MedHx_CVDyes -0.055130 0.148005 -0.372 0.709529
stenose50-70% 0.281200 0.883275 0.318 0.750211
stenose70-90% 0.529893 0.845581 0.627 0.530881
stenose90-99% 0.833372 0.847648 0.983 0.325530
stenose100% (Occlusion) 0.459584 1.110889 0.414 0.679088
stenose50-99% 14.334778 428.022224 0.033 0.973283
stenose70-99% 0.328315 1.165197 0.282 0.778122
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1260.0 on 1022 degrees of freedom
Residual deviance: 1204.9 on 1003 degrees of freedom
AIC: 1244.9
Number of Fisher Scoring iterations: 13
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' SMC_binned ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: SMC_binned
Effect size...............: -0.284484
Standard error............: 0.078214
Odds ratio (effect size)..: 0.752
Lower 95% CI..............: 0.645
Upper 95% CI..............: 0.877
Z-value...................: -3.637273
P-value...................: 0.0002755402
Hosmer and Lemeshow r^2...: 0.043715
Cox and Snell r^2.........: 0.052418
Nagelkerke's pseudo r^2...: 0.074016
Sample size of AE DB......: 2423
Sample size of model......: 1023
Missing data %............: 57.77961
Analysis of MCP1_rank.
- processing CalcificationPlaque
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ ORdate_year + DiabetesStatus +
GFR_MDRD + MedHx_CVD, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) ORdate_year DiabetesStatusDiabetes GFR_MDRD MedHx_CVDyes
-5.235e+02 2.619e-01 -4.535e-01 -9.264e-03 -3.696e-01
Degrees of Freedom: 497 Total (i.e. Null); 493 Residual
Null Deviance: 675.4
Residual Deviance: 656.8 AIC: 666.8
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.7543 -1.2079 0.8134 1.0099 1.6284
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -5.303e+02 1.846e+02 -2.872 0.00408 **
currentDF[, PROTEIN] -9.703e-02 9.764e-02 -0.994 0.32037
Age 1.444e-03 1.244e-02 0.116 0.90761
Gendermale -6.259e-02 2.178e-01 -0.287 0.77380
ORdate_year 2.642e-01 9.214e-02 2.868 0.00414 **
Hypertension.compositeyes 3.948e-01 2.795e-01 1.413 0.15772
DiabetesStatusDiabetes -5.263e-01 2.397e-01 -2.196 0.02808 *
SmokerStatusEx-smoker -1.944e-01 2.133e-01 -0.911 0.36216
SmokerStatusNever smoked -9.473e-02 3.207e-01 -0.295 0.76768
Med.Statin.LLDyes -1.919e-01 2.227e-01 -0.861 0.38897
Med.all.antiplateletyes 2.845e-01 3.414e-01 0.833 0.40477
GFR_MDRD -9.088e-03 5.390e-03 -1.686 0.09180 .
BMI 1.066e-02 2.573e-02 0.414 0.67859
MedHx_CVDyes -3.417e-01 2.018e-01 -1.693 0.09049 .
stenose50-70% 1.416e+00 1.348e+00 1.050 0.29354
stenose70-90% 1.678e+00 1.259e+00 1.333 0.18247
stenose90-99% 1.412e+00 1.254e+00 1.126 0.26026
stenose100% (Occlusion) 1.621e+00 1.604e+00 1.011 0.31222
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 675.45 on 497 degrees of freedom
Residual deviance: 648.38 on 480 degrees of freedom
AIC: 684.38
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' CalcificationPlaque ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: CalcificationPlaque
Effect size...............: -0.097028
Standard error............: 0.097644
Odds ratio (effect size)..: 0.908
Lower 95% CI..............: 0.749
Upper 95% CI..............: 1.099
Z-value...................: -0.993689
P-value...................: 0.3203742
Hosmer and Lemeshow r^2...: 0.04007
Cox and Snell r^2.........: 0.052897
Nagelkerke's pseudo r^2...: 0.071252
Sample size of AE DB......: 2423
Sample size of model......: 498
Missing data %............: 79.44697
- processing CollagenPlaque
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
ORdate_year + SmokerStatus + Med.all.antiplatelet, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] ORdate_year SmokerStatusEx-smoker SmokerStatusNever smoked
-800.8702 -0.5054 0.4003 -0.5831 -0.9310
Med.all.antiplateletyes
0.7596
Degrees of Freedom: 495 Total (i.e. Null); 490 Residual
Null Deviance: 493.1
Residual Deviance: 447.1 AIC: 459.1
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3991 0.2993 0.4921 0.6733 1.3455
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -7.590e+02 8.436e+02 -0.900 0.368269
currentDF[, PROTEIN] -4.958e-01 1.301e-01 -3.812 0.000138 ***
Age -5.619e-03 1.617e-02 -0.347 0.728273
Gendermale -1.393e-01 2.861e-01 -0.487 0.626335
ORdate_year 3.866e-01 1.171e-01 3.302 0.000961 ***
Hypertension.compositeyes 2.724e-01 3.500e-01 0.778 0.436329
DiabetesStatusDiabetes 1.981e-01 3.234e-01 0.612 0.540222
SmokerStatusEx-smoker -5.865e-01 2.850e-01 -2.058 0.039624 *
SmokerStatusNever smoked -9.821e-01 3.912e-01 -2.510 0.012068 *
Med.Statin.LLDyes -9.019e-02 2.759e-01 -0.327 0.743778
Med.all.antiplateletyes 8.581e-01 4.047e-01 2.120 0.033973 *
GFR_MDRD -2.424e-03 7.057e-03 -0.344 0.731196
BMI -9.001e-03 3.494e-02 -0.258 0.796738
MedHx_CVDyes 7.111e-03 2.589e-01 0.027 0.978092
stenose50-70% -1.253e+01 8.103e+02 -0.015 0.987662
stenose70-90% -1.352e+01 8.103e+02 -0.017 0.986683
stenose90-99% -1.400e+01 8.103e+02 -0.017 0.986217
stenose100% (Occlusion) -1.323e+01 8.103e+02 -0.016 0.986975
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 493.05 on 495 degrees of freedom
Residual deviance: 439.04 on 478 degrees of freedom
AIC: 475.04
Number of Fisher Scoring iterations: 14
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' CollagenPlaque ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: CollagenPlaque
Effect size...............: -0.495808
Standard error............: 0.130055
Odds ratio (effect size)..: 0.609
Lower 95% CI..............: 0.472
Upper 95% CI..............: 0.786
Z-value...................: -3.812285
P-value...................: 0.0001376877
Hosmer and Lemeshow r^2...: 0.10955
Cox and Snell r^2.........: 0.103179
Nagelkerke's pseudo r^2...: 0.163795
Sample size of AE DB......: 2423
Sample size of model......: 496
Missing data %............: 79.52951
- processing Fat10Perc
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Gender + Hypertension.composite + SmokerStatus, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Gendermale Hypertension.compositeyes SmokerStatusEx-smoker
0.8092 0.6602 0.6928 0.6592 -0.6083
SmokerStatusNever smoked
0.1413
Degrees of Freedom: 497 Total (i.e. Null); 492 Residual
Null Deviance: 491.1
Residual Deviance: 444.9 AIC: 456.9
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.5848 0.3017 0.4903 0.6709 1.8103
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.019e+02 8.644e+02 -0.349 0.7269
currentDF[, PROTEIN] 6.900e-01 1.329e-01 5.190 2.11e-07 ***
Age 2.784e-03 1.595e-02 0.175 0.8614
Gendermale 6.538e-01 2.651e-01 2.467 0.0136 *
ORdate_year 1.572e-01 1.183e-01 1.328 0.1841
Hypertension.compositeyes 6.489e-01 3.426e-01 1.894 0.0582 .
DiabetesStatusDiabetes -3.041e-01 2.996e-01 -1.015 0.3102
SmokerStatusEx-smoker -6.573e-01 2.800e-01 -2.347 0.0189 *
SmokerStatusNever smoked 4.369e-02 4.552e-01 0.096 0.9235
Med.Statin.LLDyes -2.220e-01 2.967e-01 -0.748 0.4545
Med.all.antiplateletyes 2.654e-01 4.143e-01 0.641 0.5218
GFR_MDRD 1.690e-03 7.138e-03 0.237 0.8128
BMI 3.527e-02 3.295e-02 1.070 0.2844
MedHx_CVDyes 1.223e-01 2.549e-01 0.480 0.6313
stenose50-70% -1.438e+01 8.312e+02 -0.017 0.9862
stenose70-90% -1.327e+01 8.312e+02 -0.016 0.9873
stenose90-99% -1.361e+01 8.312e+02 -0.016 0.9869
stenose100% (Occlusion) -1.294e+01 8.312e+02 -0.016 0.9876
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 491.11 on 497 degrees of freedom
Residual deviance: 435.50 on 480 degrees of freedom
AIC: 471.5
Number of Fisher Scoring iterations: 14
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' Fat10Perc ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: Fat10Perc
Effect size...............: 0.689959
Standard error............: 0.132948
Odds ratio (effect size)..: 1.994
Lower 95% CI..............: 1.536
Upper 95% CI..............: 2.587
Z-value...................: 5.1897
P-value...................: 2.106334e-07
Hosmer and Lemeshow r^2...: 0.113222
Cox and Snell r^2.........: 0.105647
Nagelkerke's pseudo r^2...: 0.168498
Sample size of AE DB......: 2423
Sample size of model......: 498
Missing data %............: 79.44697
- processing IPH
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ Age + Gender +
DiabetesStatus + BMI + MedHx_CVD, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) Age Gendermale DiabetesStatusDiabetes BMI MedHx_CVDyes
-1.99101 0.01776 0.74177 -0.50330 0.05039 0.34743
Degrees of Freedom: 497 Total (i.e. Null); 492 Residual
Null Deviance: 552.3
Residual Deviance: 530.6 AIC: 542.6
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.0930 0.4579 0.6185 0.7655 1.4577
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -44.157516 210.353643 -0.210 0.83373
currentDF[, PROTEIN] 0.085258 0.112450 0.758 0.44834
Age 0.013062 0.014168 0.922 0.35654
Gendermale 0.764000 0.235270 3.247 0.00116 **
ORdate_year 0.020756 0.104972 0.198 0.84326
Hypertension.compositeyes 0.238795 0.310574 0.769 0.44196
DiabetesStatusDiabetes -0.517784 0.264433 -1.958 0.05022 .
SmokerStatusEx-smoker -0.079166 0.246460 -0.321 0.74805
SmokerStatusNever smoked 0.053141 0.367384 0.145 0.88499
Med.Statin.LLDyes -0.087080 0.260840 -0.334 0.73850
Med.all.antiplateletyes -0.106642 0.399317 -0.267 0.78942
GFR_MDRD -0.005579 0.006259 -0.891 0.37278
BMI 0.050000 0.029351 1.704 0.08847 .
MedHx_CVDyes 0.344835 0.224565 1.536 0.12464
stenose50-70% 1.271129 1.377715 0.923 0.35620
stenose70-90% 1.170764 1.265627 0.925 0.35494
stenose90-99% 1.367189 1.262889 1.083 0.27899
stenose100% (Occlusion) 1.478726 1.723795 0.858 0.39099
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 552.26 on 497 degrees of freedom
Residual deviance: 526.41 on 480 degrees of freedom
AIC: 562.41
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' IPH ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: IPH
Effect size...............: 0.085258
Standard error............: 0.11245
Odds ratio (effect size)..: 1.089
Lower 95% CI..............: 0.874
Upper 95% CI..............: 1.358
Z-value...................: 0.758181
P-value...................: 0.4483429
Hosmer and Lemeshow r^2...: 0.046812
Cox and Snell r^2.........: 0.050589
Nagelkerke's pseudo r^2...: 0.075495
Sample size of AE DB......: 2423
Sample size of model......: 498
Missing data %............: 79.44697
- processing MAC_binned
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Gender + ORdate_year + Med.Statin.LLD + GFR_MDRD, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Gendermale ORdate_year Med.Statin.LLDyes GFR_MDRD
-762.20937 0.39750 0.32386 0.38053 0.51246 -0.00836
Degrees of Freedom: 493 Total (i.e. Null); 488 Residual
Null Deviance: 671.2
Residual Deviance: 630.4 AIC: 642.4
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.0233 -1.1555 0.7355 0.9909 1.5585
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -7.459e+02 5.422e+02 -1.376 0.168912
currentDF[, PROTEIN] 3.810e-01 1.016e-01 3.748 0.000178 ***
Age -1.768e-02 1.269e-02 -1.393 0.163596
Gendermale 3.458e-01 2.195e-01 1.575 0.115240
ORdate_year 3.798e-01 9.574e-02 3.967 7.28e-05 ***
Hypertension.compositeyes 5.242e-02 2.887e-01 0.182 0.855941
DiabetesStatusDiabetes -1.422e-01 2.464e-01 -0.577 0.563990
SmokerStatusEx-smoker 5.878e-02 2.183e-01 0.269 0.787743
SmokerStatusNever smoked 1.942e-01 3.266e-01 0.595 0.552050
Med.Statin.LLDyes 4.288e-01 2.239e-01 1.915 0.055488 .
Med.all.antiplateletyes -1.192e-01 3.515e-01 -0.339 0.734579
GFR_MDRD -9.966e-03 5.527e-03 -1.803 0.071365 .
BMI -3.345e-03 2.565e-02 -0.130 0.896243
MedHx_CVDyes 1.279e-01 2.039e-01 0.627 0.530510
stenose50-70% -1.355e+01 5.071e+02 -0.027 0.978679
stenose70-90% -1.325e+01 5.071e+02 -0.026 0.979153
stenose90-99% -1.355e+01 5.071e+02 -0.027 0.978687
stenose100% (Occlusion) -1.393e+01 5.071e+02 -0.027 0.978091
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 671.15 on 493 degrees of freedom
Residual deviance: 623.39 on 476 degrees of freedom
AIC: 659.39
Number of Fisher Scoring iterations: 13
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' MAC_binned ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: MAC_binned
Effect size...............: 0.380999
Standard error............: 0.101645
Odds ratio (effect size)..: 1.464
Lower 95% CI..............: 1.199
Upper 95% CI..............: 1.786
Z-value...................: 3.748326
P-value...................: 0.0001780186
Hosmer and Lemeshow r^2...: 0.071171
Cox and Snell r^2.........: 0.092166
Nagelkerke's pseudo r^2...: 0.124048
Sample size of AE DB......: 2423
Sample size of model......: 494
Missing data %............: 79.61205
- processing SMC_binned
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender, family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age Gendermale
3.58159 -0.46804 -0.03057 -0.73346
Degrees of Freedom: 495 Total (i.e. Null); 492 Residual
Null Deviance: 595.8
Residual Deviance: 558.3 AIC: 566.3
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3253 -1.1830 0.6182 0.8468 1.4395
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.655e+02 5.405e+02 -0.491 0.62331
currentDF[, PROTEIN] -4.611e-01 1.126e-01 -4.094 4.23e-05 ***
Age -3.691e-02 1.427e-02 -2.587 0.00969 **
Gendermale -8.251e-01 2.669e-01 -3.091 0.00199 **
ORdate_year 1.421e-01 1.019e-01 1.395 0.16311
Hypertension.compositeyes -3.209e-01 3.348e-01 -0.959 0.33780
DiabetesStatusDiabetes -2.069e-01 2.620e-01 -0.790 0.42963
SmokerStatusEx-smoker 2.049e-01 2.393e-01 0.856 0.39174
SmokerStatusNever smoked -1.393e-01 3.395e-01 -0.410 0.68158
Med.Statin.LLDyes -1.806e-01 2.464e-01 -0.733 0.46353
Med.all.antiplateletyes -8.168e-02 3.820e-01 -0.214 0.83070
GFR_MDRD -1.716e-03 5.923e-03 -0.290 0.77209
BMI -2.059e-02 2.927e-02 -0.703 0.48175
MedHx_CVDyes -1.088e-01 2.247e-01 -0.484 0.62815
stenose50-70% -1.382e+01 5.005e+02 -0.028 0.97797
stenose70-90% -1.404e+01 5.005e+02 -0.028 0.97762
stenose90-99% -1.391e+01 5.005e+02 -0.028 0.97782
stenose100% (Occlusion) -1.486e+01 5.005e+02 -0.030 0.97631
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 595.82 on 495 degrees of freedom
Residual deviance: 547.99 on 478 degrees of freedom
AIC: 583.99
Number of Fisher Scoring iterations: 13
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' SMC_binned ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: SMC_binned
Effect size...............: -0.461087
Standard error............: 0.112615
Odds ratio (effect size)..: 0.631
Lower 95% CI..............: 0.506
Upper 95% CI..............: 0.786
Z-value...................: -4.094379
P-value...................: 4.233022e-05
Hosmer and Lemeshow r^2...: 0.080275
Cox and Snell r^2.........: 0.091928
Nagelkerke's pseudo r^2...: 0.131479
Sample size of AE DB......: 2423
Sample size of model......: 496
Missing data %............: 79.52951
cat("Edit the column names...\n")Edit the column names...
colnames(GLM.results) = c("Dataset", "Predictor", "Trait",
"Beta", "s.e.m.",
"OR", "low95CI", "up95CI",
"Z-value", "P-value", "r^2_l", "r^2_cs", "r^2_nagelkerke", "AE_N", "Model_N", "Perc_Miss")
cat("Correct the variable types...\n")Correct the variable types...
GLM.results$Beta <- as.numeric(GLM.results$Beta)
GLM.results$s.e.m. <- as.numeric(GLM.results$s.e.m.)
GLM.results$OR <- as.numeric(GLM.results$OR)
GLM.results$low95CI <- as.numeric(GLM.results$low95CI)
GLM.results$up95CI <- as.numeric(GLM.results$up95CI)
GLM.results$`Z-value` <- as.numeric(GLM.results$`Z-value`)
GLM.results$`P-value` <- as.numeric(GLM.results$`P-value`)
GLM.results$`r^2_l` <- as.numeric(GLM.results$`r^2_l`)
GLM.results$`r^2_cs` <- as.numeric(GLM.results$`r^2_cs`)
GLM.results$`r^2_nagelkerke` <- as.numeric(GLM.results$`r^2_nagelkerke`)
GLM.results$`AE_N` <- as.numeric(GLM.results$`AE_N`)
GLM.results$`Model_N` <- as.numeric(GLM.results$`Model_N`)
GLM.results$`Perc_Miss` <- as.numeric(GLM.results$`Perc_Miss`)
# Save the data
cat("Writing results to Excel-file...\n")Writing results to Excel-file...
### Univariate
write.xlsx(GLM.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Bin.Multi.Protein.PlaquePhenotypes.RANK.MODEL2.xlsx"),
row.names = FALSE, col.names = TRUE, sheetName = "Bin.Multi.PlaquePheno")
# Removing intermediates
cat("Removing intermediate files...\n")Removing intermediate files...
rm(TRAIT, trait, currentDF, GLM.results, GLM.results.TEMP, fit, model_step)We will perform a cross-sectional analysis between plaque MCP1 levels and the ‘clinical status’ of the plaque in terms of presence of patients’ symptoms (symptomatic vs. asymptomatic). The symptoms of interest are:
In this model we correct for Age, Gender, and year of surgery.
GLM.results <- data.frame(matrix(NA, ncol = 16, nrow = 0))
for (protein in 1:length(TRAITS.PROTEIN.RANK)) {
PROTEIN = TRAITS.PROTEIN.RANK[protein]
cat(paste0("\nAnalysis of ",PROTEIN,".\n"))
TRAIT = "AsymptSympt"
cat(paste0("\n- processing ",TRAIT,"\n\n"))
currentDF <- as.data.frame(AEDB.CEA %>%
dplyr::select(., PROTEIN, TRAIT, COVARIATES_M1) %>%
filter(complete.cases(.))) %>%
filter_if(~is.numeric(.), all_vars(!is.infinite(.)))
# for debug
# print(DT::datatable(currentDF))
# print(nrow(currentDF))
# print(str(currentDF))
# print(class(currentDF[,TRAIT]))
### univariate
# + Hypertension.composite + DiabetesStatus + SmokerCurrent +
# Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI +
# CAD_history + Stroke_history + Peripheral.interv + stenose
fit <- glm(as.factor(currentDF[,TRAIT]) ~ currentDF[,PROTEIN] + Age + Gender + ORdate_year,
data = currentDF, family = binomial(link = "logit"))
model_step <- stepAIC(fit, direction = "both", trace = FALSE)
print(model_step)
print(summary(fit))
GLM.results.TEMP <- data.frame(matrix(NA, ncol = 16, nrow = 0))
GLM.results.TEMP[1,] = GLM.BIN(fit, "AEDB.CEA", PROTEIN, TRAIT, verbose = TRUE)
GLM.results = rbind(GLM.results, GLM.results.TEMP)
}
Analysis of MCP1_pg_ml_2015_rank.
- processing AsymptSympt
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age Gendermale ORdate_year
-124.83943 0.27031 0.02877 -0.51749 0.06249
Degrees of Freedom: 1198 Total (i.e. Null); 1194 Residual
Null Deviance: 827.2
Residual Deviance: 797.3 AIC: 807.3
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.5524 0.3495 0.4339 0.5243 0.8965
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -124.83943 68.66012 -1.818 0.06903 .
currentDF[, PROTEIN] 0.27031 0.10237 2.641 0.00828 **
Age 0.02877 0.01023 2.811 0.00493 **
Gendermale -0.51749 0.22116 -2.340 0.01929 *
ORdate_year 0.06249 0.03424 1.825 0.06796 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 827.22 on 1198 degrees of freedom
Residual deviance: 797.31 on 1194 degrees of freedom
AIC: 807.31
Number of Fisher Scoring iterations: 5
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' AsymptSympt ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: AsymptSympt
Effect size...............: 0.270307
Standard error............: 0.102367
Odds ratio (effect size)..: 1.31
Lower 95% CI..............: 1.072
Upper 95% CI..............: 1.602
Z-value...................: 2.640561
P-value...................: 0.008276887
Hosmer and Lemeshow r^2...: 0.036146
Cox and Snell r^2.........: 0.02463
Nagelkerke's pseudo r^2...: 0.049419
Sample size of AE DB......: 2423
Sample size of model......: 1199
Missing data %............: 50.51589
Analysis of MCP1_rank.
- processing AsymptSympt
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
ORdate_year, family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] ORdate_year
-473.3895 0.3371 0.2371
Degrees of Freedom: 555 Total (i.e. Null); 553 Residual
Null Deviance: 479
Residual Deviance: 468.7 AIC: 474.7
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3911 0.4414 0.5340 0.6219 1.0452
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -480.62215 225.42835 -2.132 0.03300 *
currentDF[, PROTEIN] 0.36235 0.12346 2.935 0.00334 **
Age 0.01562 0.01370 1.140 0.25440
Gendermale -0.29174 0.27407 -1.064 0.28711
ORdate_year 0.24030 0.11253 2.135 0.03272 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 478.98 on 555 degrees of freedom
Residual deviance: 466.39 on 551 degrees of freedom
AIC: 476.39
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' AsymptSympt ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: AsymptSympt
Effect size...............: 0.362354
Standard error............: 0.123463
Odds ratio (effect size)..: 1.437
Lower 95% CI..............: 1.128
Upper 95% CI..............: 1.83
Z-value...................: 2.934919
P-value...................: 0.003336347
Hosmer and Lemeshow r^2...: 0.026279
Cox and Snell r^2.........: 0.022385
Nagelkerke's pseudo r^2...: 0.038764
Sample size of AE DB......: 2423
Sample size of model......: 556
Missing data %............: 77.05324
cat("Edit the column names...\n")Edit the column names...
colnames(GLM.results) = c("Dataset", "Predictor", "Trait",
"Beta", "s.e.m.",
"OR", "low95CI", "up95CI",
"Z-value", "P-value", "r^2_l", "r^2_cs", "r^2_nagelkerke", "AE_N", "Model_N", "Perc_Miss")
cat("Correct the variable types...\n")Correct the variable types...
GLM.results$Beta <- as.numeric(GLM.results$Beta)
GLM.results$s.e.m. <- as.numeric(GLM.results$s.e.m.)
GLM.results$OR <- as.numeric(GLM.results$OR)
GLM.results$low95CI <- as.numeric(GLM.results$low95CI)
GLM.results$up95CI <- as.numeric(GLM.results$up95CI)
GLM.results$`Z-value` <- as.numeric(GLM.results$`Z-value`)
GLM.results$`P-value` <- as.numeric(GLM.results$`P-value`)
GLM.results$`r^2_l` <- as.numeric(GLM.results$`r^2_l`)
GLM.results$`r^2_cs` <- as.numeric(GLM.results$`r^2_cs`)
GLM.results$`r^2_nagelkerke` <- as.numeric(GLM.results$`r^2_nagelkerke`)
GLM.results$`AE_N` <- as.numeric(GLM.results$`AE_N`)
GLM.results$`Model_N` <- as.numeric(GLM.results$`Model_N`)
GLM.results$`Perc_Miss` <- as.numeric(GLM.results$`Perc_Miss`)
# Save the data
cat("Writing results to Excel-file...\n")Writing results to Excel-file...
### Univariate
write.xlsx(GLM.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Bin.Uni.Protein.RANK.Symptoms.MODEL1.xlsx"),
row.names = FALSE, col.names = TRUE, sheetName = "Bin.Uni.Symptoms")
# Removing intermediates
cat("Removing intermediate files...\n")Removing intermediate files...
rm(TRAIT, currentDF, GLM.results, GLM.results.TEMP, fit, model_step)In this model we correct for Age, Gender, Hypertension status, Diabetes status, current smoker status, lipid-lowering drugs (LLDs), antiplatelet medication, eGFR (MDRD), BMI, MedHx_CVD (combination of CAD history, stroke history, and peripheral interventions), and stenosis..
GLM.results <- data.frame(matrix(NA, ncol = 16, nrow = 0))
for (protein in 1:length(TRAITS.PROTEIN.RANK)) {
PROTEIN = TRAITS.PROTEIN.RANK[protein]
cat(paste0("\nAnalysis of ",PROTEIN,".\n"))
TRAIT = "AsymptSympt"
cat(paste0("\n- processing ",TRAIT,"\n\n"))
currentDF <- as.data.frame(AEDB.CEA %>%
dplyr::select(., PROTEIN, TRAIT, COVARIATES_M2) %>%
filter(complete.cases(.))) %>%
filter_if(~is.numeric(.), all_vars(!is.infinite(.)))
# for debug
# print(DT::datatable(currentDF))
# print(nrow(currentDF))
# print(str(currentDF))
# print(class(currentDF[,TRAIT]))
### univariate
fit <- glm(as.factor(currentDF[,TRAIT]) ~ currentDF[,PROTEIN] + Age + Gender + ORdate_year +
Hypertension.composite + DiabetesStatus + SmokerStatus +
Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI +
MedHx_CVD + stenose,
data = currentDF, family = binomial(link = "logit"))
model_step <- stepAIC(fit, direction = "both", trace = FALSE)
print(model_step)
print(summary(fit))
GLM.results.TEMP <- data.frame(matrix(NA, ncol = 16, nrow = 0))
GLM.results.TEMP[1,] = GLM.BIN(fit, "AEDB.CEA", PROTEIN, TRAIT, verbose = TRUE)
GLM.results = rbind(GLM.results, GLM.results.TEMP)
}
Analysis of MCP1_pg_ml_2015_rank.
- processing AsymptSympt
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Med.all.antiplatelet + stenose,
family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age Gendermale ORdate_year
-145.03274 0.32511 0.02147 -0.48726 0.08040
Med.all.antiplateletyes stenose50-70% stenose70-90% stenose90-99% stenose100% (Occlusion)
-0.91400 -13.07895 -14.66964 -14.29506 0.03649
stenose50-99% stenose70-99%
-15.84146 -0.73998
Degrees of Freedom: 1037 Total (i.e. Null); 1026 Residual
Null Deviance: 726.9
Residual Deviance: 679.9 AIC: 703.9
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.2614 0.2818 0.4201 0.5408 1.0245
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.486e+02 9.514e+02 -0.156 0.87591
currentDF[, PROTEIN] 3.063e-01 1.122e-01 2.729 0.00636 **
Age 2.888e-02 1.278e-02 2.260 0.02385 *
Gendermale -4.336e-01 2.409e-01 -1.800 0.07187 .
ORdate_year 8.214e-02 3.946e-02 2.082 0.03739 *
Hypertension.compositeyes -3.362e-01 3.458e-01 -0.972 0.33083
DiabetesStatusDiabetes -4.766e-02 2.441e-01 -0.195 0.84521
SmokerStatusEx-smoker -3.345e-01 2.345e-01 -1.426 0.15373
SmokerStatusNever smoked -2.811e-03 3.574e-01 -0.008 0.99372
Med.Statin.LLDyes -2.461e-01 2.688e-01 -0.916 0.35983
Med.all.antiplateletyes -9.270e-01 4.806e-01 -1.929 0.05372 .
GFR_MDRD 6.238e-03 5.532e-03 1.128 0.25950
BMI -8.706e-03 2.805e-02 -0.310 0.75628
MedHx_CVDyes 9.157e-02 2.110e-01 0.434 0.66436
stenose50-70% -1.317e+01 9.481e+02 -0.014 0.98891
stenose70-90% -1.473e+01 9.481e+02 -0.016 0.98760
stenose90-99% -1.437e+01 9.481e+02 -0.015 0.98791
stenose100% (Occlusion) -1.476e-01 1.228e+03 0.000 0.99990
stenose50-99% -1.613e+01 9.481e+02 -0.017 0.98642
stenose70-99% -7.880e-01 1.183e+03 -0.001 0.99947
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 726.94 on 1037 degrees of freedom
Residual deviance: 673.02 on 1018 degrees of freedom
AIC: 713.02
Number of Fisher Scoring iterations: 15
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ml_2015_rank ' with ' AsymptSympt ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ml_2015_rank
Trait/outcome.............: AsymptSympt
Effect size...............: 0.306293
Standard error............: 0.112242
Odds ratio (effect size)..: 1.358
Lower 95% CI..............: 1.09
Upper 95% CI..............: 1.693
Z-value...................: 2.728861
P-value...................: 0.006355351
Hosmer and Lemeshow r^2...: 0.074179
Cox and Snell r^2.........: 0.050624
Nagelkerke's pseudo r^2...: 0.100528
Sample size of AE DB......: 2423
Sample size of model......: 1038
Missing data %............: 57.16054
Analysis of MCP1_rank.
- processing AsymptSympt
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
ORdate_year + Med.Statin.LLD, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] ORdate_year Med.Statin.LLDyes
-529.0018 0.3015 0.2650 -0.4436
Degrees of Freedom: 497 Total (i.e. Null); 494 Residual
Null Deviance: 442.3
Residual Deviance: 431.4 AIC: 439.4
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.4505 0.3657 0.5162 0.6508 1.2570
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -5.467e+02 1.385e+03 -0.395 0.69306
currentDF[, PROTEIN] 3.487e-01 1.293e-01 2.697 0.00701 **
Age 1.924e-02 1.635e-02 1.177 0.23928
Gendermale -3.424e-01 2.977e-01 -1.150 0.25001
ORdate_year 2.808e-01 1.220e-01 2.301 0.02139 *
Hypertension.compositeyes -5.366e-01 4.410e-01 -1.217 0.22366
DiabetesStatusDiabetes 1.954e-01 3.251e-01 0.601 0.54780
SmokerStatusEx-smoker -1.745e-01 2.861e-01 -0.610 0.54197
SmokerStatusNever smoked -4.281e-01 4.090e-01 -1.047 0.29516
Med.Statin.LLDyes -3.682e-01 3.131e-01 -1.176 0.23961
Med.all.antiplateletyes -4.933e-01 5.140e-01 -0.960 0.33722
GFR_MDRD 9.396e-03 7.081e-03 1.327 0.18455
BMI 1.197e-02 3.425e-02 0.349 0.72676
MedHx_CVDyes 8.287e-02 2.647e-01 0.313 0.75424
stenose50-70% -1.392e+01 1.363e+03 -0.010 0.99185
stenose70-90% -1.531e+01 1.363e+03 -0.011 0.99104
stenose90-99% -1.494e+01 1.363e+03 -0.011 0.99126
stenose100% (Occlusion) -8.723e-02 1.712e+03 0.000 0.99996
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 442.26 on 497 degrees of freedom
Residual deviance: 417.29 on 480 degrees of freedom
AIC: 453.29
Number of Fisher Scoring iterations: 15
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_rank ' with ' AsymptSympt ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_rank
Trait/outcome.............: AsymptSympt
Effect size...............: 0.348715
Standard error............: 0.129319
Odds ratio (effect size)..: 1.417
Lower 95% CI..............: 1.1
Upper 95% CI..............: 1.826
Z-value...................: 2.696545
P-value...................: 0.007006285
Hosmer and Lemeshow r^2...: 0.056471
Cox and Snell r^2.........: 0.048913
Nagelkerke's pseudo r^2...: 0.083108
Sample size of AE DB......: 2423
Sample size of model......: 498
Missing data %............: 79.44697
cat("Edit the column names...\n")Edit the column names...
colnames(GLM.results) = c("Dataset", "Predictor", "Trait",
"Beta", "s.e.m.",
"OR", "low95CI", "up95CI",
"Z-value", "P-value", "r^2_l", "r^2_cs", "r^2_nagelkerke", "AE_N", "Model_N", "Perc_Miss")
cat("Correct the variable types...\n")Correct the variable types...
GLM.results$Beta <- as.numeric(GLM.results$Beta)
GLM.results$s.e.m. <- as.numeric(GLM.results$s.e.m.)
GLM.results$OR <- as.numeric(GLM.results$OR)
GLM.results$low95CI <- as.numeric(GLM.results$low95CI)
GLM.results$up95CI <- as.numeric(GLM.results$up95CI)
GLM.results$`Z-value` <- as.numeric(GLM.results$`Z-value`)
GLM.results$`P-value` <- as.numeric(GLM.results$`P-value`)
GLM.results$`r^2_l` <- as.numeric(GLM.results$`r^2_l`)
GLM.results$`r^2_cs` <- as.numeric(GLM.results$`r^2_cs`)
GLM.results$`r^2_nagelkerke` <- as.numeric(GLM.results$`r^2_nagelkerke`)
GLM.results$`AE_N` <- as.numeric(GLM.results$`AE_N`)
GLM.results$`Model_N` <- as.numeric(GLM.results$`Model_N`)
GLM.results$`Perc_Miss` <- as.numeric(GLM.results$`Perc_Miss`)
# Save the data
cat("Writing results to Excel-file...\n")Writing results to Excel-file...
### Univariate
write.xlsx(GLM.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Bin.Multi.Protein.RANK.Symptoms.MODEL2.xlsx"),
row.names = FALSE, col.names = TRUE, sheetName = "Bin.Multi.Symptoms")
# Removing intermediates
cat("Removing intermediate files...\n")Removing intermediate files...
rm(TRAIT, currentDF, GLM.results, GLM.results.TEMP, fit, model_step)For the longitudinal analyses of plaque MCP1 levels and secondary cardiovascular events over a three-year follow-up period.
The primary outcome is defined as “a composite of fatal or non-fatal myocardial infarction, fatal or non-fatal stroke, ruptured aortic aneurysm, fatal cardiac failure, coronary or peripheral interventions, leg amputation due to vascular causes, and cardiovascular death”, i.e. major adverse cardiovascular events (MACE). Variable: epmajor.3years, these include: - myocardial infarction (MI) - cerebral infarction (CVA/stroke) - cardiovascular death (exact cause to be investigated) - cerebral bleeding (CVA/stroke) - fatal myocardial infarction (MI) - fatal cerebral infarction - fatal cerebral bleeding - sudden death - fatal heart failure - fatal aneurysm rupture - other cardiovascular death..
The secondary outcomes will be
epstroke.3years, these include:
epcoronary.3years, these include:
epcvdeath.3years, these include:
We will use 3-year follow-up, but we will also calculate 30 days and 90 days follow-up ‘time-to-event’ variables. On average there are 365.25 days in a year. We can calculate 30-days and 90-days follow-up time based on the three years follow-up.
cutt.off.30days = (1/365.25) * 30
cutt.off.90days = (1/365.25) * 90
# Fix maximum FU of 30 and 90 days
AEDB <- AEDB %>%
mutate(
FU.cutt.off.30days = ifelse(max.followup <= cutt.off.30days, max.followup, cutt.off.30days),
FU.cutt.off.90days = ifelse(max.followup <= cutt.off.90days, max.followup, cutt.off.90days)
)
AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary",
"max.followup",
"FU.cutt.off.3years",
"FU.cutt.off.30days",
"FU.cutt.off.90days"))
require(labelled)
AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
rm(AEDB.temp)
AEDB.CEA <- AEDB.CEA %>%
mutate(
FU.cutt.off.30days = ifelse(max.followup <= cutt.off.30days, max.followup, cutt.off.30days),
FU.cutt.off.90days = ifelse(max.followup <= cutt.off.90days, max.followup, cutt.off.90days)
)
AEDB.CEA.temp <- subset(AEDB.CEA, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary",
"max.followup",
"FU.cutt.off.3years",
"FU.cutt.off.30days",
"FU.cutt.off.90days"))
require(labelled)
AEDB.CEA.temp$Gender <- to_factor(AEDB.CEA.temp$Gender)
AEDB.CEA.temp$Hospital <- to_factor(AEDB.CEA.temp$Hospital)
AEDB.CEA.temp$Artery_summary <- to_factor(AEDB.CEA.temp$Artery_summary)
DT::datatable(AEDB.CEA.temp[1:10,], caption = "Excerpt of the whole AEDB.CEA.", rownames = FALSE)
rm(AEDB.CEA.temp)Here we will calculate the new 30- and 90-days follow-up of the events and their event-times of interest:
epmajor.3years)epstroke.3years)epcoronary.3years)epcvdeath.3years)avg_days_in_year = 365.25
cutt.off.30days.scaled <- cutt.off.30days * 365.25
cutt.off.90days.scaled <- cutt.off.90days * 365.25
# Event times
AEDB <- AEDB %>%
mutate(
ep_major_t_30days = ifelse(ep_major_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_major_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_stroke_t_30days = ifelse(ep_stroke_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_stroke_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_coronary_t_30days = ifelse(ep_coronary_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_coronary_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_cvdeath_t_30days = ifelse(ep_cvdeath_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_cvdeath_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_major_t_90days = ifelse(ep_major_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_major_t_3years * avg_days_in_year, cutt.off.90days.scaled),
ep_stroke_t_90days = ifelse(ep_stroke_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_stroke_t_3years * avg_days_in_year, cutt.off.90days.scaled),
ep_coronary_t_90days = ifelse(ep_coronary_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_coronary_t_3years * avg_days_in_year, cutt.off.90days.scaled),
ep_cvdeath_t_90days = ifelse(ep_cvdeath_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_cvdeath_t_3years * avg_days_in_year, cutt.off.90days.scaled)
)
AEDB.CEA <- AEDB.CEA %>%
mutate(
ep_major_t_30days = ifelse(ep_major_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_major_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_stroke_t_30days = ifelse(ep_stroke_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_stroke_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_coronary_t_30days = ifelse(ep_coronary_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_coronary_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_cvdeath_t_30days = ifelse(ep_cvdeath_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_cvdeath_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_major_t_90days = ifelse(ep_major_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_major_t_3years * avg_days_in_year, cutt.off.90days.scaled),
ep_stroke_t_90days = ifelse(ep_stroke_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_stroke_t_3years * avg_days_in_year, cutt.off.90days.scaled),
ep_coronary_t_90days = ifelse(ep_coronary_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_coronary_t_3years * avg_days_in_year, cutt.off.90days.scaled),
ep_cvdeath_t_90days = ifelse(ep_cvdeath_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_cvdeath_t_3years * avg_days_in_year, cutt.off.90days.scaled)
)
attach(AEDB)
AEDB[,"epmajor.30days"] <- AEDB$epmajor.3years
AEDB$epmajor.30days[epmajor.3years == 1 & ep_major_t_3years > cutt.off.30days] <- 0
AEDB[,"epstroke.30days"] <- AEDB$epstroke.3years
AEDB$epstroke.30days[epstroke.3years == 1 & ep_stroke_t_3years > cutt.off.30days] <- 0
AEDB[,"epcoronary.30days"] <- AEDB$epcoronary.3years
AEDB$epcoronary.30days[epcoronary.3years == 1 & ep_coronary_t_3years > cutt.off.30days] <- 0
AEDB[,"epcvdeath.30days"] <- AEDB$epcvdeath.3years
AEDB$epcvdeath.30days[epcvdeath.3years == 1 & ep_cvdeath_t_3years > cutt.off.30days] <- 0
AEDB[,"epmajor.90days"] <- AEDB$epmajor.3years
AEDB$epmajor.90days[epmajor.3years == 1 & ep_major_t_3years > cutt.off.90days] <- 0
AEDB[,"epstroke.90days"] <- AEDB$epstroke.3years
AEDB$epstroke.90days[epstroke.3years == 1 & ep_stroke_t_3years > cutt.off.90days] <- 0
AEDB[,"epcoronary.90days"] <- AEDB$epcoronary.3years
AEDB$epcoronary.90days[epcoronary.3years == 1 & ep_coronary_t_3years > cutt.off.90days] <- 0
AEDB[,"epcvdeath.90days"] <- AEDB$epcvdeath.3years
AEDB$epcvdeath.90days[epcvdeath.3years == 1 & ep_cvdeath_t_3years > cutt.off.90days] <- 0
detach(AEDB)
AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary",
"epmajor.3years", "epstroke.3years", "epcoronary.3years", "epcvdeath.3years",
"epmajor.30days", "epstroke.30days", "epcoronary.30days", "epcvdeath.30days",
"epmajor.90days", "epstroke.90days", "epcoronary.90days", "epcvdeath.90days"))
require(labelled)
AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
rm(AEDB.temp)
attach(AEDB.CEA)
AEDB.CEA[,"epmajor.30days"] <- AEDB.CEA$epmajor.3years
AEDB.CEA$epmajor.30days[epmajor.3years == 1 & ep_major_t_3years > cutt.off.30days] <- 0
AEDB.CEA[,"epstroke.30days"] <- AEDB.CEA$epstroke.3years
AEDB.CEA$epstroke.30days[epstroke.3years == 1 & ep_stroke_t_3years > cutt.off.30days] <- 0
AEDB.CEA[,"epcoronary.30days"] <- AEDB.CEA$epcoronary.3years
AEDB.CEA$epcoronary.30days[epcoronary.3years == 1 & ep_coronary_t_3years > cutt.off.30days] <- 0
AEDB.CEA[,"epcvdeath.30days"] <- AEDB.CEA$epcvdeath.3years
AEDB.CEA$epcvdeath.30days[epcvdeath.3years == 1 & ep_cvdeath_t_3years > cutt.off.30days] <- 0
AEDB.CEA[,"epmajor.90days"] <- AEDB.CEA$epmajor.3years
AEDB.CEA$epmajor.90days[epmajor.3years == 1 & ep_major_t_3years > cutt.off.90days] <- 0
AEDB.CEA[,"epstroke.90days"] <- AEDB.CEA$epstroke.3years
AEDB.CEA$epstroke.90days[epstroke.3years == 1 & ep_stroke_t_3years > cutt.off.90days] <- 0
AEDB.CEA[,"epcoronary.90days"] <- AEDB.CEA$epcoronary.3years
AEDB.CEA$epcoronary.90days[epcoronary.3years == 1 & ep_coronary_t_3years > cutt.off.90days] <- 0
AEDB.CEA[,"epcvdeath.90days"] <- AEDB.CEA$epcvdeath.3years
AEDB.CEA$epcvdeath.90days[epcvdeath.3years == 1 & ep_cvdeath_t_3years > cutt.off.90days] <- 0
detach(AEDB.CEA)
AEDB.CEA.temp <- subset(AEDB.CEA, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary",
"epmajor.3years", "epstroke.3years", "epcoronary.3years", "epcvdeath.3years",
"epmajor.30days", "epstroke.30days", "epcoronary.30days", "epcvdeath.30days",
"epmajor.90days", "epstroke.90days", "epcoronary.90days", "epcvdeath.90days"))
require(labelled)
AEDB.CEA.temp$Gender <- to_factor(AEDB.CEA.temp$Gender)
AEDB.CEA.temp$Hospital <- to_factor(AEDB.CEA.temp$Hospital)
AEDB.CEA.temp$Artery_summary <- to_factor(AEDB.CEA.temp$Artery_summary)
DT::datatable(AEDB.CEA.temp[1:10,], caption = "Excerpt of the whole AEDB.CEA.", rownames = FALSE)
rm(AEDB.CEA.temp)First we do some sanity checks and inventory the time-to-event and event variables.
# Reference: https://bioconductor.org/packages/devel/bioc/vignettes/MultiAssayExperiment/inst/doc/QuickStartMultiAssay.html
# If you want to suppress warnings and messages when installing/loading packages
# suppressPackageStartupMessages({})
install.packages.auto("survival")
install.packages.auto("survminer")
install.packages.auto("Hmisc")
cat("* Creating function to summarize Cox regression and prepare container for results.")* Creating function to summarize Cox regression and prepare container for results.
# Function to get summary statistics from Cox regression model
COX.STAT <- function(coxfit, DATASET, OUTCOME, protein){
cat("Summarizing Cox regression results for '", protein ,"' and its association to '",OUTCOME,"' in '",DATASET,"'.\n")
if (nrow(summary(coxfit)$coefficients) == 1) {
output = c(protein, rep(NA,8))
cat("Model not fitted; probably singular.\n")
}else {
cat("Collecting data.\n\n")
cox.sum <- summary(coxfit)
cox.effectsize = cox.sum$coefficients[1,1]
cox.SE = cox.sum$coefficients[1,3]
cox.HReffect = cox.sum$coefficients[1,2]
cox.CI_low = exp(cox.effectsize - 1.96 * cox.SE)
cox.CI_up = exp(cox.effectsize + 1.96 * cox.SE)
cox.zvalue = cox.sum$coefficients[1,4]
cox.pvalue = cox.sum$coefficients[1,5]
cox.sample_size = cox.sum$n
cox.nevents = cox.sum$nevent
output = c(DATASET, OUTCOME, protein, cox.effectsize, cox.SE, cox.HReffect, cox.CI_low, cox.CI_up, cox.zvalue, cox.pvalue, cox.sample_size, cox.nevents)
cat("We have collected the following:\n")
cat("Dataset used..............:", DATASET, "\n")
cat("Outcome analyzed..........:", OUTCOME, "\n")
cat("Protein...................:", protein, "\n")
cat("Effect size...............:", round(cox.effectsize, 6), "\n")
cat("Standard error............:", round(cox.SE, 6), "\n")
cat("Odds ratio (effect size)..:", round(cox.HReffect, 3), "\n")
cat("Lower 95% CI..............:", round(cox.CI_low, 3), "\n")
cat("Upper 95% CI..............:", round(cox.CI_up, 3), "\n")
cat("T-value...................:", round(cox.zvalue, 6), "\n")
cat("P-value...................:", signif(cox.pvalue, 8), "\n")
cat("Sample size in model......:", cox.sample_size, "\n")
cat("Number of events..........:", cox.nevents, "\n")
}
return(output)
print(output)
}
times = c("ep_major_t_3years",
"ep_stroke_t_3years", "ep_coronary_t_3years", "ep_cvdeath_t_3years")
endpoints = c("epmajor.3years",
"epstroke.3years", "epcoronary.3years", "epcvdeath.3years")
cat("* Check the cases per event type - for sanity.")* Check the cases per event type - for sanity.
for (events in endpoints){
require(labelled)
print(paste0("Printing the summary of: ",events))
# print(summary(AEDB.CEA[,events]))
print(table(AEDB.CEA[,events]))
}[1] "Printing the summary of: epmajor.3years"
0 1
2035 265
[1] "Printing the summary of: epstroke.3years"
0 1
2171 130
[1] "Printing the summary of: epcoronary.3years"
0 1
2119 182
[1] "Printing the summary of: epcvdeath.3years"
0 1
2210 90
cat("* Check distribution of events over time - for sanity.")* Check distribution of events over time - for sanity.
for (eventtimes in times){
print(paste0("Printing the summary of: ",eventtimes))
print(summary(AEDB.CEA[,eventtimes]))
}[1] "Printing the summary of: ep_major_t_3years"
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.000 2.710 3.000 2.573 3.000 3.000 125
[1] "Printing the summary of: ep_stroke_t_3years"
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.000 2.877 3.000 2.624 3.000 3.000 125
[1] "Printing the summary of: ep_coronary_t_3years"
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.000 2.784 3.000 2.622 3.000 3.000 125
[1] "Printing the summary of: ep_cvdeath_t_3years"
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00274 2.91233 3.00000 2.70902 3.00000 3.00000 125
for (eventtime in times){
print(paste0("Printing the distribution of: ",eventtime))
p <- gghistogram(AEDB.CEA, x = eventtime, y = "..count..",
main = eventtime, bins = 15,
xlab = "year", color = uithof_color[16], fill = uithof_color[16], ggtheme = theme_minimal())
print(p)
ggsave(file = paste0(QC_loc, "/",Today,".AEDB.CEA.EventDistributionPerYear.",eventtime,".pdf"), plot = last_plot())
}[1] "Printing the distribution of: ep_major_t_3years"
[1] "Printing the distribution of: ep_stroke_t_3years"
[1] "Printing the distribution of: ep_coronary_t_3years"
[1] "Printing the distribution of: ep_cvdeath_t_3years"
times30 = c("ep_major_t_30days",
"ep_stroke_t_30days", "ep_coronary_t_30days", "ep_cvdeath_t_30days")
endpoints30 = c("epmajor.30days",
"epstroke.30days", "epcoronary.30days", "epcvdeath.30days")
cat("* Check the cases per event type - for sanity.")* Check the cases per event type - for sanity.
for (events in endpoints30){
print(paste0("Printing the summary of: ",events))
# print(summary(AEDB.CEA[,events]))
print(table(AEDB.CEA[,events]))
}[1] "Printing the summary of: epmajor.30days"
0 1
2222 78
[1] "Printing the summary of: epstroke.30days"
0 1
2248 53
[1] "Printing the summary of: epcoronary.30days"
0 1
2267 34
[1] "Printing the summary of: epcvdeath.30days"
0 1
2288 12
cat("* Check distribution of events over time - for sanity.")* Check distribution of events over time - for sanity.
for (eventtimes in times30){
print(paste0("Printing the summary of: ",eventtimes))
print(summary(AEDB.CEA[,eventtimes]))
}[1] "Printing the summary of: ep_major_t_30days"
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 30.00 30.00 29.09 30.00 30.00 125
[1] "Printing the summary of: ep_stroke_t_30days"
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 30.00 30.00 29.32 30.00 30.00 125
[1] "Printing the summary of: ep_coronary_t_30days"
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 30.00 30.00 29.54 30.00 30.00 125
[1] "Printing the summary of: ep_cvdeath_t_30days"
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
1.001 30.000 30.000 29.854 30.000 30.000 125
for (eventtime in times30){
print(paste0("Printing the distribution of: ",eventtime))
p <- gghistogram(AEDB.CEA, x = eventtime, y = "..count..",
main = eventtime, bins = 15,
xlab = "days", color = uithof_color[16], fill = uithof_color[16], ggtheme = theme_minimal())
print(p)
ggsave(file = paste0(QC_loc, "/",Today,".AEDB.CEA.EventDistributionPer30Days.",eventtime,".pdf"), plot = last_plot())
}[1] "Printing the distribution of: ep_major_t_30days"
[1] "Printing the distribution of: ep_stroke_t_30days"
[1] "Printing the distribution of: ep_coronary_t_30days"
[1] "Printing the distribution of: ep_cvdeath_t_30days"
times90 = c("ep_major_t_90days",
"ep_stroke_t_90days", "ep_coronary_t_90days", "ep_cvdeath_t_90days")
endpoints90 = c("epmajor.90days",
"epstroke.90days", "epcoronary.90days", "epcvdeath.90days")
cat("* Check the cases per event type - for sanity.")* Check the cases per event type - for sanity.
for (events in endpoints90){
print(paste0("Printing the summary of: ",events))
# print(summary(AEDB.CEA[,events]))
print(table(AEDB.CEA[,events]))
}[1] "Printing the summary of: epmajor.90days"
0 1
2206 94
[1] "Printing the summary of: epstroke.90days"
0 1
2241 60
[1] "Printing the summary of: epcoronary.90days"
0 1
2257 44
[1] "Printing the summary of: epcvdeath.90days"
0 1
2281 19
cat("* Check distribution of events over time - for sanity.")* Check distribution of events over time - for sanity.
for (eventtimes in times90){
print(paste0("Printing the summary of: ",eventtimes))
print(summary(AEDB.CEA[,eventtimes]))
}[1] "Printing the summary of: ep_major_t_90days"
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 90.00 90.00 86.75 90.00 90.00 125
[1] "Printing the summary of: ep_stroke_t_90days"
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 90.00 90.00 87.51 90.00 90.00 125
[1] "Printing the summary of: ep_coronary_t_90days"
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 90.00 90.00 88.21 90.00 90.00 125
[1] "Printing the summary of: ep_cvdeath_t_90days"
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
1.001 90.000 90.000 89.320 90.000 90.000 125
for (eventtime in times90){
print(paste0("Printing the distribution of: ",eventtime))
p <- gghistogram(AEDB.CEA, x = eventtime, y = "..count..",
main = eventtime, bins = 15,
xlab = "days", color = uithof_color[16], fill = uithof_color[16], ggtheme = theme_minimal())
print(p)
ggsave(file = paste0(QC_loc, "/",Today,".AEDB.CEA.EventDistributionPer90Days.",eventtime,".pdf"), plot = last_plot())
}[1] "Printing the distribution of: ep_major_t_90days"
[1] "Printing the distribution of: ep_stroke_t_90days"
[1] "Printing the distribution of: ep_coronary_t_90days"
[1] "Printing the distribution of: ep_cvdeath_t_90days"
Let’s perform the actual Cox-regressions. We will apply a couple of models:
# Set up a dataframe to receive results
COX.results <- data.frame(matrix(NA, ncol = 12, nrow = 0))
# Looping over each protein/endpoint/time combination
for (i in 1:length(times)){
eptime = times[i]
ep = endpoints[i]
cat(paste0("* Analyzing the effect of plaque proteins on [",ep,"].\n"))
cat(" - creating temporary SE for this work.\n")
TEMP.DF = as.data.frame(AEDB.CEA)
cat(" - making a 'Surv' object and adding this to temporary dataframe.\n")
TEMP.DF$event <- as.integer(TEMP.DF[,ep])
TEMP.DF$y <- Surv(time = TEMP.DF[,eptime], event = TEMP.DF$event)
cat(" - making strata of each of the plaque proteins and start survival analysis.\n")
for (protein in 1:length(TRAITS.PROTEIN.RANK)){
cat(paste0(" > processing [",TRAITS.PROTEIN.RANK[protein],"]; ",protein," out of ",length(TRAITS.PROTEIN.RANK)," proteins.\n"))
# splitting into two groups
TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]] <- cut2(TEMP.DF[,TRAITS.PROTEIN.RANK[protein]], g = 2)
cat(paste0(" > cross tabulation of ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
show(table(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]))
cat(paste0("\n > fitting the model for ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
fit <- survfit(as.formula(paste0("y ~ ", TRAITS.PROTEIN.RANK[protein])), data = TEMP.DF)
cat(paste0("\n > make a Kaplan-Meier-shizzle...\n"))
# make Kaplan-Meier curve and save it
show(ggsurvplot(fit, data = TEMP.DF,
palette = c("#DB003F", "#1290D9"),
# palete = c("F59D10", "#DB003F", "#49A01D", "#1290D9"),
linetype = c(1,2),
# linetype = c(1,2,3,4),
# conf.int = FALSE, conf.int.fill = "#595A5C", conf.int.alpha = 0.1,
pval = FALSE, pval.method = FALSE, pval.size = 4,
risk.table = TRUE, risk.table.y.text = FALSE, tables.y.text.col = TRUE, fontsize = 4,
censor = FALSE,
legend = "right",
legend.title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
legend.labs = c("low", "high"),
title = paste0("Risk of ",ep,""), xlab = "Time [years]", font.main = c(16, "bold", "black")))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.survival.",ep,".2G.",
TRAITS.PROTEIN.RANK[protein],".pdf"), width = 12, height = 10, onefile = FALSE)
cat(paste0("\n > perform the Cox-regression fashizzle and plot it...\n"))
### Do Cox-regression and plot it
### MODEL 1 (Simple model)
cox = coxph(Surv(TEMP.DF[,eptime], event) ~ TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]+Age+Gender + ORdate_year, data = TEMP.DF)
coxplot = coxph(Surv(TEMP.DF[,eptime], event) ~ strata(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]])+Age+Gender + ORdate_year, data = TEMP.DF)
plot(survfit(coxplot), main = paste0("Cox proportional hazard of [",ep,"] per [",eptime,"]."),
# ylim = c(0.2, 1), xlim = c(0,3), col = c("#595A5C", "#DB003F", "#1290D9"),
ylim = c(0, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
ylab = "Suvival probability", xlab = "FU time [years]",
mark.time = FALSE, axes = FALSE, bty = "n")
legend("topright",
c("low", "high"),
title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
bty = "n")
axis(side = 1, at = seq(0, 3, by = 1))
axis(side = 2, at = seq(0, 1, by = 0.2))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.Cox.",ep,".2G.",
# Today,".AEDB.CEA.Cox.",ep,".4G.",
TRAITS.PROTEIN.RANK[protein],".MODEL1.pdf"), height = 12, width = 10, onefile = TRUE)
show(summary(cox))
cat(paste0("\n > writing the Cox-regression fashizzle to Excel...\n"))
COX.results.TEMP <- data.frame(matrix(NA, ncol = 12, nrow = 0))
COX.results.TEMP[1,] = COX.STAT(cox, "AEDB.CEA", ep, TRAITS.PROTEIN.RANK[protein])
COX.results = rbind(COX.results, COX.results.TEMP)
}
}* Analyzing the effect of plaque proteins on [epmajor.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ml_2015_rank]; 1 out of 2 proteins.
> cross tabulation of MCP1_pg_ml_2015_rank-stratum.
[-3.34125,0.00209) [ 0.00209,3.34125]
600 599
> fitting the model for MCP1_pg_ml_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year, data = TEMP.DF)
n= 1187, number of events= 140
(1236 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 0.01185 1.01192 0.18373 0.064 0.948575
Age 0.03489 1.03550 0.01003 3.478 0.000506 ***
Gendermale 0.35203 1.42196 0.20065 1.754 0.079351 .
ORdate_year -0.02361 0.97667 0.03018 -0.782 0.434149
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 1.0119 0.9882 0.7059 1.451
Age 1.0355 0.9657 1.0153 1.056
Gendermale 1.4220 0.7033 0.9596 2.107
ORdate_year 0.9767 1.0239 0.9206 1.036
Concordance= 0.589 (se = 0.025 )
Likelihood ratio test= 16.08 on 4 df, p=0.003
Wald test = 15.15 on 4 df, p=0.004
Score (logrank) test = 15.23 on 4 df, p=0.004
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ml_2015_rank ' and its association to ' epmajor.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epmajor.3years
Protein...................: MCP1_pg_ml_2015_rank
Effect size...............: 0.01185
Standard error............: 0.18373
Odds ratio (effect size)..: 1.012
Lower 95% CI..............: 0.706
Upper 95% CI..............: 1.451
T-value...................: 0.064496
P-value...................: 0.9485755
Sample size in model......: 1187
Number of events..........: 140
> processing [MCP1_rank]; 2 out of 2 proteins.
> cross tabulation of MCP1_rank-stratum.
[-3.12162,0.00225) [ 0.00225,3.12162]
278 278
> fitting the model for MCP1_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year, data = TEMP.DF)
n= 549, number of events= 70
(1874 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] -0.22427 0.79910 0.24647 -0.910 0.3629
Age 0.02639 1.02674 0.01475 1.789 0.0736 .
Gendermale 0.87183 2.39128 0.34246 2.546 0.0109 *
ORdate_year -0.03519 0.96542 0.11300 -0.311 0.7555
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] 0.7991 1.2514 0.4929 1.295
Age 1.0267 0.9740 0.9975 1.057
Gendermale 2.3913 0.4182 1.2222 4.679
ORdate_year 0.9654 1.0358 0.7736 1.205
Concordance= 0.618 (se = 0.034 )
Likelihood ratio test= 12.21 on 4 df, p=0.02
Wald test = 10.74 on 4 df, p=0.03
Score (logrank) test = 11.16 on 4 df, p=0.02
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_rank ' and its association to ' epmajor.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epmajor.3years
Protein...................: MCP1_rank
Effect size...............: -0.224272
Standard error............: 0.246475
Odds ratio (effect size)..: 0.799
Lower 95% CI..............: 0.493
Upper 95% CI..............: 1.295
T-value...................: -0.909918
P-value...................: 0.3628658
Sample size in model......: 549
Number of events..........: 70
* Analyzing the effect of plaque proteins on [epstroke.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ml_2015_rank]; 1 out of 2 proteins.
> cross tabulation of MCP1_pg_ml_2015_rank-stratum.
[-3.34125,0.00209) [ 0.00209,3.34125]
600 599
> fitting the model for MCP1_pg_ml_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year, data = TEMP.DF)
n= 1187, number of events= 74
(1236 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 0.05159 1.05295 0.25152 0.205 0.83747
Age 0.03709 1.03779 0.01382 2.684 0.00728 **
Gendermale 0.09193 1.09629 0.26020 0.353 0.72385
ORdate_year -0.04704 0.95405 0.04159 -1.131 0.25806
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 1.053 0.9497 0.6432 1.724
Age 1.038 0.9636 1.0101 1.066
Gendermale 1.096 0.9122 0.6583 1.826
ORdate_year 0.954 1.0482 0.8794 1.035
Concordance= 0.591 (se = 0.035 )
Likelihood ratio test= 8.33 on 4 df, p=0.08
Wald test = 7.9 on 4 df, p=0.1
Score (logrank) test = 7.96 on 4 df, p=0.09
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ml_2015_rank ' and its association to ' epstroke.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epstroke.3years
Protein...................: MCP1_pg_ml_2015_rank
Effect size...............: 0.051594
Standard error............: 0.251515
Odds ratio (effect size)..: 1.053
Lower 95% CI..............: 0.643
Upper 95% CI..............: 1.724
T-value...................: 0.205134
P-value...................: 0.8374673
Sample size in model......: 1187
Number of events..........: 74
> processing [MCP1_rank]; 2 out of 2 proteins.
> cross tabulation of MCP1_rank-stratum.
[-3.12162,0.00225) [ 0.00225,3.12162]
278 278
> fitting the model for MCP1_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year, data = TEMP.DF)
n= 549, number of events= 36
(1874 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] -0.366525 0.693139 0.345901 -1.060 0.289
Age 0.007526 1.007554 0.019822 0.380 0.704
Gendermale 0.332937 1.395059 0.403044 0.826 0.409
ORdate_year -0.014799 0.985310 0.157445 -0.094 0.925
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] 0.6931 1.4427 0.3519 1.365
Age 1.0076 0.9925 0.9692 1.047
Gendermale 1.3951 0.7168 0.6332 3.074
ORdate_year 0.9853 1.0149 0.7237 1.341
Concordance= 0.571 (se = 0.043 )
Likelihood ratio test= 1.96 on 4 df, p=0.7
Wald test = 1.92 on 4 df, p=0.8
Score (logrank) test = 1.93 on 4 df, p=0.7
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_rank ' and its association to ' epstroke.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epstroke.3years
Protein...................: MCP1_rank
Effect size...............: -0.366525
Standard error............: 0.345901
Odds ratio (effect size)..: 0.693
Lower 95% CI..............: 0.352
Upper 95% CI..............: 1.365
T-value...................: -1.059623
P-value...................: 0.2893161
Sample size in model......: 549
Number of events..........: 36
* Analyzing the effect of plaque proteins on [epcoronary.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ml_2015_rank]; 1 out of 2 proteins.
> cross tabulation of MCP1_pg_ml_2015_rank-stratum.
[-3.34125,0.00209) [ 0.00209,3.34125]
600 599
> fitting the model for MCP1_pg_ml_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year, data = TEMP.DF)
n= 1187, number of events= 91
(1236 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 0.172342 1.188084 0.225648 0.764 0.4450
Age 0.008689 1.008727 0.012048 0.721 0.4708
Gendermale 0.643664 1.903442 0.270491 2.380 0.0173 *
ORdate_year -0.055903 0.945631 0.037238 -1.501 0.1333
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 1.1881 0.8417 0.7634 1.849
Age 1.0087 0.9913 0.9852 1.033
Gendermale 1.9034 0.5254 1.1202 3.234
ORdate_year 0.9456 1.0575 0.8791 1.017
Concordance= 0.591 (se = 0.031 )
Likelihood ratio test= 9.57 on 4 df, p=0.05
Wald test = 8.73 on 4 df, p=0.07
Score (logrank) test = 8.95 on 4 df, p=0.06
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ml_2015_rank ' and its association to ' epcoronary.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epcoronary.3years
Protein...................: MCP1_pg_ml_2015_rank
Effect size...............: 0.172342
Standard error............: 0.225648
Odds ratio (effect size)..: 1.188
Lower 95% CI..............: 0.763
Upper 95% CI..............: 1.849
T-value...................: 0.763763
P-value...................: 0.4450084
Sample size in model......: 1187
Number of events..........: 91
> processing [MCP1_rank]; 2 out of 2 proteins.
> cross tabulation of MCP1_rank-stratum.
[-3.12162,0.00225) [ 0.00225,3.12162]
278 278
> fitting the model for MCP1_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year, data = TEMP.DF)
n= 549, number of events= 46
(1874 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] 0.24448 1.27695 0.30911 0.791 0.4290
Age 0.03668 1.03736 0.01872 1.959 0.0501 .
Gendermale 0.92420 2.51986 0.43913 2.105 0.0353 *
ORdate_year -0.23892 0.78748 0.13604 -1.756 0.0790 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] 1.2770 0.7831 0.6967 2.340
Age 1.0374 0.9640 1.0000 1.076
Gendermale 2.5199 0.3968 1.0656 5.959
ORdate_year 0.7875 1.2699 0.6032 1.028
Concordance= 0.652 (se = 0.039 )
Likelihood ratio test= 13.98 on 4 df, p=0.007
Wald test = 12.67 on 4 df, p=0.01
Score (logrank) test = 13.17 on 4 df, p=0.01
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_rank ' and its association to ' epcoronary.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epcoronary.3years
Protein...................: MCP1_rank
Effect size...............: 0.244478
Standard error............: 0.309109
Odds ratio (effect size)..: 1.277
Lower 95% CI..............: 0.697
Upper 95% CI..............: 2.34
T-value...................: 0.79091
P-value...................: 0.4289965
Sample size in model......: 549
Number of events..........: 46
* Analyzing the effect of plaque proteins on [epcvdeath.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ml_2015_rank]; 1 out of 2 proteins.
> cross tabulation of MCP1_pg_ml_2015_rank-stratum.
[-3.34125,0.00209) [ 0.00209,3.34125]
600 599
> fitting the model for MCP1_pg_ml_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year, data = TEMP.DF)
n= 1187, number of events= 45
(1236 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] -0.11518 0.89120 0.32234 -0.357 0.7208
Age 0.09047 1.09469 0.02008 4.505 6.63e-06 ***
Gendermale 0.91435 2.49514 0.41402 2.208 0.0272 *
ORdate_year -0.06875 0.93356 0.05424 -1.267 0.2050
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 0.8912 1.1221 0.4738 1.676
Age 1.0947 0.9135 1.0524 1.139
Gendermale 2.4951 0.4008 1.1084 5.617
ORdate_year 0.9336 1.0712 0.8394 1.038
Concordance= 0.716 (se = 0.039 )
Likelihood ratio test= 29.09 on 4 df, p=7e-06
Wald test = 24.68 on 4 df, p=6e-05
Score (logrank) test = 25.41 on 4 df, p=4e-05
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ml_2015_rank ' and its association to ' epcvdeath.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epcvdeath.3years
Protein...................: MCP1_pg_ml_2015_rank
Effect size...............: -0.115182
Standard error............: 0.322341
Odds ratio (effect size)..: 0.891
Lower 95% CI..............: 0.474
Upper 95% CI..............: 1.676
T-value...................: -0.357328
P-value...................: 0.7208462
Sample size in model......: 1187
Number of events..........: 45
> processing [MCP1_rank]; 2 out of 2 proteins.
> cross tabulation of MCP1_rank-stratum.
[-3.12162,0.00225) [ 0.00225,3.12162]
278 278
> fitting the model for MCP1_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year, data = TEMP.DF)
n= 549, number of events= 26
(1874 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] -0.03367 0.96689 0.40417 -0.083 0.9336
Age 0.05571 1.05729 0.02549 2.185 0.0289 *
Gendermale 1.05290 2.86594 0.61477 1.713 0.0868 .
ORdate_year -0.11039 0.89548 0.18082 -0.611 0.5415
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] 0.9669 1.0342 0.4379 2.135
Age 1.0573 0.9458 1.0058 1.111
Gendermale 2.8659 0.3489 0.8590 9.562
ORdate_year 0.8955 1.1167 0.6283 1.276
Concordance= 0.679 (se = 0.06 )
Likelihood ratio test= 9.52 on 4 df, p=0.05
Wald test = 8.25 on 4 df, p=0.08
Score (logrank) test = 8.62 on 4 df, p=0.07
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_rank ' and its association to ' epcvdeath.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epcvdeath.3years
Protein...................: MCP1_rank
Effect size...............: -0.03367
Standard error............: 0.40417
Odds ratio (effect size)..: 0.967
Lower 95% CI..............: 0.438
Upper 95% CI..............: 2.135
T-value...................: -0.083308
P-value...................: 0.9336068
Sample size in model......: 549
Number of events..........: 26
cat("- Edit the column names...\n")- Edit the column names...
colnames(COX.results) = c("Dataset", "Outcome", "CpG",
"Beta", "s.e.m.",
"HR", "low95CI", "up95CI",
"Z-value", "P-value", "SampleSize", "N_events")
cat("- Correct the variable types...\n")- Correct the variable types...
COX.results$Beta <- as.numeric(COX.results$Beta)
COX.results$s.e.m. <- as.numeric(COX.results$s.e.m.)
COX.results$HR <- as.numeric(COX.results$HR)
COX.results$low95CI <- as.numeric(COX.results$low95CI)
COX.results$up95CI <- as.numeric(COX.results$up95CI)
COX.results$`Z-value` <- as.numeric(COX.results$`Z-value`)
COX.results$`P-value` <- as.numeric(COX.results$`P-value`)
COX.results$SampleSize <- as.numeric(COX.results$SampleSize)
COX.results$N_events <- as.numeric(COX.results$N_events)
AEDB.CEA.COX.results <- COX.results
# Save the data
cat("- Writing results to Excel-file...\n")- Writing results to Excel-file...
head.style <- createStyle(textDecoration = "BOLD")
write.xlsx(AEDB.CEA.COX.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Cox.2G.MODEL1.xlsx"),
creator = "Sander W. van der Laan",
sheetName = "Results", headerStyle = head.style,
row.names = FALSE, col.names = TRUE, overwrite = TRUE)
# Removing intermediates
cat("- Removing intermediate files...\n")- Removing intermediate files...
#rm(TEMP.DF, protein, fit, cox, coxplot, COX.results, COX.results.TEMP, head.style, AEDB.CEA.COX.results)
#rm(head.style)# Set up a dataframe to receive results
COX.results <- data.frame(matrix(NA, ncol = 12, nrow = 0))
# Looping over each protein/endpoint/time combination
for (i in 1:length(times)){
eptime = times[i]
ep = endpoints[i]
cat(paste0("* Analyzing the effect of plaque proteins on [",ep,"].\n"))
cat(" - creating temporary SE for this work.\n")
TEMP.DF = as.data.frame(AEDB.CEA)
cat(" - making a 'Surv' object and adding this to temporary dataframe.\n")
TEMP.DF$event <- as.integer(TEMP.DF[,ep])
#as.integer(TEMP.DF[,ep] == "Excluded")
TEMP.DF$y <- Surv(time = TEMP.DF[,eptime], event = TEMP.DF$event)
cat(" - making strata of each of the plaque proteins and start survival analysis.\n")
for (protein in 1:length(TRAITS.PROTEIN.RANK)){
cat(paste0(" > processing [",TRAITS.PROTEIN.RANK[protein],"]; ",protein," out of ",length(TRAITS.PROTEIN.RANK)," proteins.\n"))
# splitting into two groups
TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]] <- cut2(TEMP.DF[,TRAITS.PROTEIN.RANK[protein]], g = 2)
cat(paste0(" > cross tabulation of ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
show(table(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]))
cat(paste0("\n > fitting the model for ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
fit <- survfit(as.formula(paste0("y ~ ", TRAITS.PROTEIN.RANK[protein])), data = TEMP.DF)
cat(paste0("\n > make a Kaplan-Meier-shizzle...\n"))
# make Kaplan-Meier curve and save it
show(ggsurvplot(fit, data = TEMP.DF,
palette = c("#DB003F", "#1290D9"),
# palete = c("F59D10", "#DB003F", "#49A01D", "#1290D9"),
linetype = c(1,2),
# linetype = c(1,2,3,4),
# conf.int = FALSE, conf.int.fill = "#595A5C", conf.int.alpha = 0.1,
pval = FALSE, pval.method = FALSE, pval.size = 4,
risk.table = TRUE, risk.table.y.text = FALSE, tables.y.text.col = TRUE, fontsize = 4,
censor = FALSE,
legend = "right",
legend.title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
legend.labs = c("low", "high"),
title = paste0("Risk of ",ep,""), xlab = "Time [years]", font.main = c(16, "bold", "black")))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.survival.",ep,".2G.",
TRAITS.PROTEIN.RANK[protein],".pdf"), width = 12, height = 10, onefile = FALSE)
cat(paste0("\n > perform the Cox-regression fashizzle and plot it...\n"))
### Do Cox-regression and plot it
### MODEL 2 adjusted for age, sex, hypertension, diabetes, smoking, LDL-C levels, lipid-lowering drugs, antiplatelet drugs, eGFR, BMI, history of CVD, level of stenosis
cox = coxph(Surv(TEMP.DF[,eptime], event) ~ TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]+Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI + MedHx_CVD + stenose, data = TEMP.DF)
coxplot = coxph(Surv(TEMP.DF[,eptime], event) ~ strata(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]])+Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI + MedHx_CVD + stenose, data = TEMP.DF)
plot(survfit(coxplot), main = paste0("Cox proportional hazard of [",ep,"] per [",eptime,"]."),
# ylim = c(0.2, 1), xlim = c(0,3), col = c("#595A5C", "#DB003F", "#1290D9"),
ylim = c(0, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
ylab = "Suvival probability", xlab = "FU time [years]",
mark.time = FALSE, axes = FALSE, bty = "n")
legend("topright",
c("low", "high"),
title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
bty = "n")
axis(side = 1, at = seq(0, 3, by = 1))
axis(side = 2, at = seq(0, 1, by = 0.2))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.Cox.",ep,".2G.",
# Today,".AEDB.CEA.Cox.",ep,".4G.",
TRAITS.PROTEIN.RANK[protein],".MODEL2.pdf"), height = 12, width = 10, onefile = TRUE)
show(summary(cox))
cat(paste0("\n > writing the Cox-regression fashizzle to Excel...\n"))
COX.results.TEMP <- data.frame(matrix(NA, ncol = 12, nrow = 0))
COX.results.TEMP[1,] = COX.STAT(cox, "AEDB.CEA", ep, TRAITS.PROTEIN.RANK[protein])
COX.results = rbind(COX.results, COX.results.TEMP)
}
}* Analyzing the effect of plaque proteins on [epmajor.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ml_2015_rank]; 1 out of 2 proteins.
> cross tabulation of MCP1_pg_ml_2015_rank-stratum.
[-3.34125,0.00209) [ 0.00209,3.34125]
600 599
> fitting the model for MCP1_pg_ml_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = TEMP.DF)
n= 1029, number of events= 115
(1394 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 1.291e-01 1.138e+00 2.002e-01 0.645 0.518966
Age 3.304e-02 1.034e+00 1.293e-02 2.556 0.010591 *
Gendermale 3.709e-01 1.449e+00 2.288e-01 1.621 0.105078
ORdate_year -1.222e-02 9.879e-01 3.470e-02 -0.352 0.724616
Hypertension.compositeno -4.257e-01 6.533e-01 3.572e-01 -1.192 0.233306
Hypertension.compositeyes NA NA 0.000e+00 NA NA
DiabetesStatusDiabetes -1.766e-02 9.825e-01 2.237e-01 -0.079 0.937094
SmokerStatusEx-smoker -5.003e-01 6.063e-01 2.096e-01 -2.387 0.016973 *
SmokerStatusNever smoked -8.121e-01 4.439e-01 3.418e-01 -2.376 0.017500 *
Med.Statin.LLDno 2.512e-01 1.286e+00 2.183e-01 1.151 0.249766
Med.Statin.LLDyes NA NA 0.000e+00 NA NA
Med.all.antiplateletno 4.271e-01 1.533e+00 2.637e-01 1.620 0.105327
Med.all.antiplateletyes NA NA 0.000e+00 NA NA
GFR_MDRD -1.926e-02 9.809e-01 4.962e-03 -3.880 0.000104 ***
BMI 5.407e-02 1.056e+00 2.610e-02 2.071 0.038324 *
MedHx_CVDyes 5.365e-01 1.710e+00 2.221e-01 2.416 0.015694 *
stenose0-49% -1.571e+01 1.504e-07 2.447e+03 -0.006 0.994877
stenose50-70% -8.674e-01 4.200e-01 8.780e-01 -0.988 0.323168
stenose70-90% -3.100e-01 7.334e-01 7.471e-01 -0.415 0.678201
stenose90-99% -2.933e-01 7.458e-01 7.560e-01 -0.388 0.698046
stenose100% (Occlusion) -1.521e-01 8.589e-01 1.253e+00 -0.121 0.903378
stenoseNA NA NA 0.000e+00 NA NA
stenose50-99% -1.531e+01 2.252e-07 2.926e+03 -0.005 0.995826
stenose70-99% NA NA 0.000e+00 NA NA
stenose99 NA NA 0.000e+00 NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 1.138e+00 8.789e-01 0.76852 1.6846
Age 1.034e+00 9.675e-01 1.00773 1.0601
Gendermale 1.449e+00 6.901e-01 0.92531 2.2690
ORdate_year 9.879e-01 1.012e+00 0.92291 1.0574
Hypertension.compositeno 6.533e-01 1.531e+00 0.32438 1.3157
Hypertension.compositeyes NA NA NA NA
DiabetesStatusDiabetes 9.825e-01 1.018e+00 0.63374 1.5232
SmokerStatusEx-smoker 6.063e-01 1.649e+00 0.40210 0.9143
SmokerStatusNever smoked 4.439e-01 2.253e+00 0.22718 0.8674
Med.Statin.LLDno 1.286e+00 7.779e-01 0.83813 1.9719
Med.Statin.LLDyes NA NA NA NA
Med.all.antiplateletno 1.533e+00 6.524e-01 0.91414 2.5702
Med.all.antiplateletyes NA NA NA NA
GFR_MDRD 9.809e-01 1.019e+00 0.97143 0.9905
BMI 1.056e+00 9.474e-01 1.00291 1.1110
MedHx_CVDyes 1.710e+00 5.848e-01 1.10656 2.6424
stenose0-49% 1.504e-07 6.648e+06 0.00000 Inf
stenose50-70% 4.200e-01 2.381e+00 0.07515 2.3477
stenose70-90% 7.334e-01 1.363e+00 0.16959 3.1720
stenose90-99% 7.458e-01 1.341e+00 0.16947 3.2821
stenose100% (Occlusion) 8.589e-01 1.164e+00 0.07373 10.0065
stenoseNA NA NA NA NA
stenose50-99% 2.252e-07 4.441e+06 0.00000 Inf
stenose70-99% NA NA NA NA
stenose99 NA NA NA NA
Concordance= 0.698 (se = 0.023 )
Likelihood ratio test= 63.82 on 19 df, p=9e-07
Wald test = 58.85 on 19 df, p=6e-06
Score (logrank) test = 62.26 on 19 df, p=2e-06
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ml_2015_rank ' and its association to ' epmajor.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epmajor.3years
Protein...................: MCP1_pg_ml_2015_rank
Effect size...............: 0.129126
Standard error............: 0.200214
Odds ratio (effect size)..: 1.138
Lower 95% CI..............: 0.769
Upper 95% CI..............: 1.685
T-value...................: 0.64494
P-value...................: 0.5189661
Sample size in model......: 1029
Number of events..........: 115
> processing [MCP1_rank]; 2 out of 2 proteins.
> cross tabulation of MCP1_rank-stratum.
[-3.12162,0.00225) [ 0.00225,3.12162]
278 278
> fitting the model for MCP1_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = TEMP.DF)
n= 493, number of events= 61
(1930 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] -3.920e-01 6.757e-01 2.685e-01 -1.460 0.1443
Age 3.117e-02 1.032e+00 1.813e-02 1.719 0.0857 .
Gendermale 8.127e-01 2.254e+00 3.652e-01 2.226 0.0260 *
ORdate_year -3.205e-02 9.685e-01 1.249e-01 -0.257 0.7975
Hypertension.compositeno -7.542e-01 4.704e-01 5.309e-01 -1.421 0.1555
Hypertension.compositeyes NA NA 0.000e+00 NA NA
DiabetesStatusDiabetes 6.309e-01 1.879e+00 2.951e-01 2.138 0.0325 *
SmokerStatusEx-smoker -6.391e-01 5.278e-01 2.880e-01 -2.219 0.0265 *
SmokerStatusNever smoked -3.224e-01 7.244e-01 4.307e-01 -0.748 0.4542
Med.Statin.LLDno 2.275e-01 1.255e+00 2.962e-01 0.768 0.4426
Med.Statin.LLDyes NA NA 0.000e+00 NA NA
Med.all.antiplateletno -1.296e-02 9.871e-01 4.530e-01 -0.029 0.9772
Med.all.antiplateletyes NA NA 0.000e+00 NA NA
GFR_MDRD -1.091e-02 9.891e-01 6.796e-03 -1.606 0.1083
BMI 5.177e-03 1.005e+00 3.447e-02 0.150 0.8806
MedHx_CVDyes 5.574e-01 1.746e+00 3.075e-01 1.813 0.0699 .
stenose0-49% -1.651e+01 6.744e-08 3.444e+03 -0.005 0.9962
stenose50-70% -1.690e+00 1.845e-01 1.448e+00 -1.167 0.2431
stenose70-90% -7.523e-01 4.713e-01 1.049e+00 -0.717 0.4731
stenose90-99% -1.050e+00 3.501e-01 1.055e+00 -0.995 0.3198
stenose100% (Occlusion) NA NA 0.000e+00 NA NA
stenoseNA NA NA 0.000e+00 NA NA
stenose50-99% NA NA 0.000e+00 NA NA
stenose70-99% NA NA 0.000e+00 NA NA
stenose99 NA NA 0.000e+00 NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] 6.757e-01 1.480e+00 0.39920 1.144
Age 1.032e+00 9.693e-01 0.99563 1.069
Gendermale 2.254e+00 4.436e-01 1.10189 4.611
ORdate_year 9.685e-01 1.033e+00 0.75814 1.237
Hypertension.compositeno 4.704e-01 2.126e+00 0.16617 1.332
Hypertension.compositeyes NA NA NA NA
DiabetesStatusDiabetes 1.879e+00 5.321e-01 1.05394 3.351
SmokerStatusEx-smoker 5.278e-01 1.895e+00 0.30015 0.928
SmokerStatusNever smoked 7.244e-01 1.380e+00 0.31142 1.685
Med.Statin.LLDno 1.255e+00 7.965e-01 0.70247 2.244
Med.Statin.LLDyes NA NA NA NA
Med.all.antiplateletno 9.871e-01 1.013e+00 0.40623 2.399
Med.all.antiplateletyes NA NA NA NA
GFR_MDRD 9.891e-01 1.011e+00 0.97606 1.002
BMI 1.005e+00 9.948e-01 0.93952 1.075
MedHx_CVDyes 1.746e+00 5.727e-01 0.95573 3.190
stenose0-49% 6.744e-08 1.483e+07 0.00000 Inf
stenose50-70% 1.845e-01 5.420e+00 0.01080 3.151
stenose70-90% 4.713e-01 2.122e+00 0.06036 3.679
stenose90-99% 3.501e-01 2.857e+00 0.04426 2.768
stenose100% (Occlusion) NA NA NA NA
stenoseNA NA NA NA NA
stenose50-99% NA NA NA NA
stenose70-99% NA NA NA NA
stenose99 NA NA NA NA
Concordance= 0.703 (se = 0.03 )
Likelihood ratio test= 32.87 on 17 df, p=0.01
Wald test = 29.3 on 17 df, p=0.03
Score (logrank) test = 31.16 on 17 df, p=0.02
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_rank ' and its association to ' epmajor.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epmajor.3years
Protein...................: MCP1_rank
Effect size...............: -0.392011
Standard error............: 0.268514
Odds ratio (effect size)..: 0.676
Lower 95% CI..............: 0.399
Upper 95% CI..............: 1.144
T-value...................: -1.459925
P-value...................: 0.1443106
Sample size in model......: 493
Number of events..........: 61
* Analyzing the effect of plaque proteins on [epstroke.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ml_2015_rank]; 1 out of 2 proteins.
> cross tabulation of MCP1_pg_ml_2015_rank-stratum.
[-3.34125,0.00209) [ 0.00209,3.34125]
600 599
> fitting the model for MCP1_pg_ml_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = TEMP.DF)
n= 1029, number of events= 59
(1394 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 1.594e-01 1.173e+00 2.824e-01 0.564 0.5724
Age 4.416e-02 1.045e+00 1.793e-02 2.462 0.0138 *
Gendermale -4.998e-02 9.513e-01 3.010e-01 -0.166 0.8681
ORdate_year -3.475e-02 9.658e-01 4.903e-02 -0.709 0.4785
Hypertension.compositeno -1.230e-03 9.988e-01 4.192e-01 -0.003 0.9977
Hypertension.compositeyes NA NA 0.000e+00 NA NA
DiabetesStatusDiabetes -2.173e-02 9.785e-01 3.172e-01 -0.068 0.9454
SmokerStatusEx-smoker -1.136e-01 8.926e-01 2.965e-01 -0.383 0.7015
SmokerStatusNever smoked -9.518e-01 3.860e-01 5.240e-01 -1.817 0.0693 .
Med.Statin.LLDno 3.482e-01 1.417e+00 2.971e-01 1.172 0.2412
Med.Statin.LLDyes NA NA 0.000e+00 NA NA
Med.all.antiplateletno 3.779e-01 1.459e+00 3.721e-01 1.016 0.3098
Med.all.antiplateletyes NA NA 0.000e+00 NA NA
GFR_MDRD -3.561e-03 9.964e-01 7.027e-03 -0.507 0.6124
BMI 8.022e-02 1.084e+00 3.436e-02 2.335 0.0195 *
MedHx_CVDyes 3.650e-01 1.441e+00 2.941e-01 1.241 0.2146
stenose0-49% -1.549e+01 1.867e-07 3.367e+03 -0.005 0.9963
stenose50-70% -6.477e-01 5.233e-01 1.173e+00 -0.552 0.5810
stenose70-90% -4.535e-01 6.354e-01 1.055e+00 -0.430 0.6673
stenose90-99% -5.009e-01 6.060e-01 1.070e+00 -0.468 0.6396
stenose100% (Occlusion) 3.518e-01 1.422e+00 1.459e+00 0.241 0.8094
stenoseNA NA NA 0.000e+00 NA NA
stenose50-99% -1.518e+01 2.547e-07 3.975e+03 -0.004 0.9970
stenose70-99% NA NA 0.000e+00 NA NA
stenose99 NA NA 0.000e+00 NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 1.173e+00 8.526e-01 0.67425 2.040
Age 1.045e+00 9.568e-01 1.00905 1.083
Gendermale 9.513e-01 1.051e+00 0.52730 1.716
ORdate_year 9.658e-01 1.035e+00 0.87735 1.063
Hypertension.compositeno 9.988e-01 1.001e+00 0.43921 2.271
Hypertension.compositeyes NA NA NA NA
DiabetesStatusDiabetes 9.785e-01 1.022e+00 0.52549 1.822
SmokerStatusEx-smoker 8.926e-01 1.120e+00 0.49923 1.596
SmokerStatusNever smoked 3.860e-01 2.590e+00 0.13823 1.078
Med.Statin.LLDno 1.417e+00 7.059e-01 0.79124 2.536
Med.Statin.LLDyes NA NA NA NA
Med.all.antiplateletno 1.459e+00 6.853e-01 0.70369 3.026
Med.all.antiplateletyes NA NA NA NA
GFR_MDRD 9.964e-01 1.004e+00 0.98282 1.010
BMI 1.084e+00 9.229e-01 1.01297 1.159
MedHx_CVDyes 1.441e+00 6.942e-01 0.80943 2.564
stenose0-49% 1.867e-07 5.356e+06 0.00000 Inf
stenose50-70% 5.233e-01 1.911e+00 0.05246 5.219
stenose70-90% 6.354e-01 1.574e+00 0.08037 5.024
stenose90-99% 6.060e-01 1.650e+00 0.07448 4.930
stenose100% (Occlusion) 1.422e+00 7.034e-01 0.08147 24.808
stenoseNA NA NA NA NA
stenose50-99% 2.547e-07 3.926e+06 0.00000 Inf
stenose70-99% NA NA NA NA
stenose99 NA NA NA NA
Concordance= 0.672 (se = 0.034 )
Likelihood ratio test= 23.18 on 19 df, p=0.2
Wald test = 21.31 on 19 df, p=0.3
Score (logrank) test = 22.41 on 19 df, p=0.3
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ml_2015_rank ' and its association to ' epstroke.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epstroke.3years
Protein...................: MCP1_pg_ml_2015_rank
Effect size...............: 0.159428
Standard error............: 0.282444
Odds ratio (effect size)..: 1.173
Lower 95% CI..............: 0.674
Upper 95% CI..............: 2.04
T-value...................: 0.564459
P-value...................: 0.5724415
Sample size in model......: 1029
Number of events..........: 59
> processing [MCP1_rank]; 2 out of 2 proteins.
> cross tabulation of MCP1_rank-stratum.
[-3.12162,0.00225) [ 0.00225,3.12162]
278 278
> fitting the model for MCP1_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = TEMP.DF)
n= 493, number of events= 29
(1930 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] -5.111e-01 5.998e-01 3.922e-01 -1.303 0.192
Age 1.682e-02 1.017e+00 2.487e-02 0.676 0.499
Gendermale 7.422e-02 1.077e+00 4.374e-01 0.170 0.865
ORdate_year -2.613e-02 9.742e-01 1.808e-01 -0.145 0.885
Hypertension.compositeno -8.185e-01 4.411e-01 7.527e-01 -1.087 0.277
Hypertension.compositeyes NA NA 0.000e+00 NA NA
DiabetesStatusDiabetes 2.130e-01 1.237e+00 4.580e-01 0.465 0.642
SmokerStatusEx-smoker -5.374e-01 5.843e-01 4.196e-01 -1.281 0.200
SmokerStatusNever smoked -3.304e-01 7.186e-01 6.147e-01 -0.537 0.591
Med.Statin.LLDno -6.489e-02 9.372e-01 4.561e-01 -0.142 0.887
Med.Statin.LLDyes NA NA 0.000e+00 NA NA
Med.all.antiplateletno -8.493e-02 9.186e-01 6.709e-01 -0.127 0.899
Med.all.antiplateletyes NA NA 0.000e+00 NA NA
GFR_MDRD 2.245e-03 1.002e+00 1.037e-02 0.216 0.829
BMI -3.808e-03 9.962e-01 4.917e-02 -0.077 0.938
MedHx_CVDyes 2.618e-01 1.299e+00 4.162e-01 0.629 0.529
stenose0-49% -1.873e+01 7.320e-09 1.321e+04 -0.001 0.999
stenose50-70% -1.857e+01 8.632e-09 5.072e+03 -0.004 0.997
stenose70-90% -1.307e+00 2.705e-01 1.121e+00 -1.166 0.244
stenose90-99% -1.546e+00 2.130e-01 1.135e+00 -1.362 0.173
stenose100% (Occlusion) NA NA 0.000e+00 NA NA
stenoseNA NA NA 0.000e+00 NA NA
stenose50-99% NA NA 0.000e+00 NA NA
stenose70-99% NA NA 0.000e+00 NA NA
stenose99 NA NA 0.000e+00 NA NA
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] 5.998e-01 1.667e+00 0.27811 1.294
Age 1.017e+00 9.833e-01 0.96858 1.068
Gendermale 1.077e+00 9.285e-01 0.45700 2.538
ORdate_year 9.742e-01 1.026e+00 0.68351 1.389
Hypertension.compositeno 4.411e-01 2.267e+00 0.10088 1.929
Hypertension.compositeyes NA NA NA NA
DiabetesStatusDiabetes 1.237e+00 8.081e-01 0.50429 3.036
SmokerStatusEx-smoker 5.843e-01 1.712e+00 0.25672 1.330
SmokerStatusNever smoked 7.186e-01 1.392e+00 0.21540 2.398
Med.Statin.LLDno 9.372e-01 1.067e+00 0.38338 2.291
Med.Statin.LLDyes NA NA NA NA
Med.all.antiplateletno 9.186e-01 1.089e+00 0.24662 3.421
Med.all.antiplateletyes NA NA NA NA
GFR_MDRD 1.002e+00 9.978e-01 0.98209 1.023
BMI 9.962e-01 1.004e+00 0.90468 1.097
MedHx_CVDyes 1.299e+00 7.697e-01 0.57466 2.938
stenose0-49% 7.320e-09 1.366e+08 0.00000 Inf
stenose50-70% 8.632e-09 1.159e+08 0.00000 Inf
stenose70-90% 2.705e-01 3.697e+00 0.03004 2.436
stenose90-99% 2.130e-01 4.694e+00 0.02303 1.970
stenose100% (Occlusion) NA NA NA NA
stenoseNA NA NA NA NA
stenose50-99% NA NA NA NA
stenose70-99% NA NA NA NA
stenose99 NA NA NA NA
Concordance= 0.651 (se = 0.051 )
Likelihood ratio test= 9.82 on 17 df, p=0.9
Wald test = 7.29 on 17 df, p=1
Score (logrank) test = 9.15 on 17 df, p=0.9
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_rank ' and its association to ' epstroke.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epstroke.3years
Protein...................: MCP1_rank
Effect size...............: -0.511114
Standard error............: 0.392158
Odds ratio (effect size)..: 0.6
Lower 95% CI..............: 0.278
Upper 95% CI..............: 1.294
T-value...................: -1.303338
P-value...................: 0.1924592
Sample size in model......: 493
Number of events..........: 29
* Analyzing the effect of plaque proteins on [epcoronary.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ml_2015_rank]; 1 out of 2 proteins.
> cross tabulation of MCP1_pg_ml_2015_rank-stratum.
[-3.34125,0.00209) [ 0.00209,3.34125]
600 599
> fitting the model for MCP1_pg_ml_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = TEMP.DF)
n= 1029, number of events= 78
(1394 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 2.905e-01 1.337e+00 2.410e-01 1.205 0.228034
Age 3.225e-04 1.000e+00 1.532e-02 0.021 0.983210
Gendermale 8.268e-01 2.286e+00 3.041e-01 2.719 0.006547 **
ORdate_year -4.520e-02 9.558e-01 4.217e-02 -1.072 0.283803
Hypertension.compositeno -9.704e-01 3.789e-01 5.215e-01 -1.861 0.062793 .
Hypertension.compositeyes NA NA 0.000e+00 NA NA
DiabetesStatusDiabetes -8.006e-02 9.231e-01 2.760e-01 -0.290 0.771732
SmokerStatusEx-smoker -6.194e-01 5.383e-01 2.585e-01 -2.396 0.016592 *
SmokerStatusNever smoked -2.735e-01 7.607e-01 3.654e-01 -0.748 0.454249
Med.Statin.LLDno 5.528e-02 1.057e+00 2.760e-01 0.200 0.841276
Med.Statin.LLDyes NA NA 0.000e+00 NA NA
Med.all.antiplateletno 3.320e-01 1.394e+00 3.353e-01 0.990 0.322098
Med.all.antiplateletyes NA NA 0.000e+00 NA NA
GFR_MDRD -2.003e-02 9.802e-01 5.954e-03 -3.365 0.000766 ***
BMI 1.495e-02 1.015e+00 3.351e-02 0.446 0.655552
MedHx_CVDyes 6.941e-01 2.002e+00 2.795e-01 2.483 0.013015 *
stenose0-49% -1.602e+01 1.106e-07 3.018e+03 -0.005 0.995765
stenose50-70% -1.801e+00 1.651e-01 1.427e+00 -1.262 0.206807
stenose70-90% -2.542e-01 7.756e-01 1.043e+00 -0.244 0.807516
stenose90-99% -3.387e-01 7.127e-01 1.054e+00 -0.321 0.747924
stenose100% (Occlusion) -1.545e+01 1.953e-07 2.480e+03 -0.006 0.995030
stenoseNA NA NA 0.000e+00 NA NA
stenose50-99% 7.799e-01 2.181e+00 1.430e+00 0.545 0.585612
stenose70-99% NA NA 0.000e+00 NA NA
stenose99 NA NA 0.000e+00 NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 1.337e+00 7.479e-01 0.83375 2.1442
Age 1.000e+00 9.997e-01 0.97072 1.0308
Gendermale 2.286e+00 4.374e-01 1.25965 4.1490
ORdate_year 9.558e-01 1.046e+00 0.87999 1.0382
Hypertension.compositeno 3.789e-01 2.639e+00 0.13634 1.0532
Hypertension.compositeyes NA NA NA NA
DiabetesStatusDiabetes 9.231e-01 1.083e+00 0.53742 1.5854
SmokerStatusEx-smoker 5.383e-01 1.858e+00 0.32429 0.8935
SmokerStatusNever smoked 7.607e-01 1.315e+00 0.37170 1.5570
Med.Statin.LLDno 1.057e+00 9.462e-01 0.61526 1.8153
Med.Statin.LLDyes NA NA NA NA
Med.all.antiplateletno 1.394e+00 7.175e-01 0.72237 2.6894
Med.all.antiplateletyes NA NA NA NA
GFR_MDRD 9.802e-01 1.020e+00 0.96879 0.9917
BMI 1.015e+00 9.852e-01 0.95054 1.0840
MedHx_CVDyes 2.002e+00 4.995e-01 1.15752 3.4620
stenose0-49% 1.106e-07 9.039e+06 0.00000 Inf
stenose50-70% 1.651e-01 6.056e+00 0.01008 2.7052
stenose70-90% 7.756e-01 1.289e+00 0.10037 5.9929
stenose90-99% 7.127e-01 1.403e+00 0.09030 5.6244
stenose100% (Occlusion) 1.953e-07 5.121e+06 0.00000 Inf
stenoseNA NA NA NA NA
stenose50-99% 2.181e+00 4.584e-01 0.13215 36.0042
stenose70-99% NA NA NA NA
stenose99 NA NA NA NA
Concordance= 0.733 (se = 0.028 )
Likelihood ratio test= 51.75 on 19 df, p=7e-05
Wald test = 45.64 on 19 df, p=6e-04
Score (logrank) test = 49.2 on 19 df, p=2e-04
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ml_2015_rank ' and its association to ' epcoronary.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epcoronary.3years
Protein...................: MCP1_pg_ml_2015_rank
Effect size...............: 0.290473
Standard error............: 0.240969
Odds ratio (effect size)..: 1.337
Lower 95% CI..............: 0.834
Upper 95% CI..............: 2.144
T-value...................: 1.205438
P-value...................: 0.2280341
Sample size in model......: 1029
Number of events..........: 78
> processing [MCP1_rank]; 2 out of 2 proteins.
> cross tabulation of MCP1_rank-stratum.
[-3.12162,0.00225) [ 0.00225,3.12162]
278 278
> fitting the model for MCP1_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = TEMP.DF)
n= 493, number of events= 42
(1930 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] 3.996e-02 1.041e+00 3.269e-01 0.122 0.9027
Age 4.142e-02 1.042e+00 2.293e-02 1.806 0.0709 .
Gendermale 9.841e-01 2.675e+00 4.684e-01 2.101 0.0356 *
ORdate_year -2.760e-01 7.588e-01 1.478e-01 -1.867 0.0620 .
Hypertension.compositeno -2.340e-01 7.914e-01 5.431e-01 -0.431 0.6666
Hypertension.compositeyes NA NA 0.000e+00 NA NA
DiabetesStatusDiabetes 5.455e-01 1.726e+00 3.576e-01 1.526 0.1271
SmokerStatusEx-smoker -4.047e-01 6.672e-01 3.492e-01 -1.159 0.2465
SmokerStatusNever smoked -1.727e-02 9.829e-01 5.071e-01 -0.034 0.9728
Med.Statin.LLDno -5.702e-02 9.446e-01 3.626e-01 -0.157 0.8751
Med.Statin.LLDyes NA NA 0.000e+00 NA NA
Med.all.antiplateletno 6.030e-01 1.828e+00 4.594e-01 1.313 0.1893
Med.all.antiplateletyes NA NA 0.000e+00 NA NA
GFR_MDRD -1.491e-02 9.852e-01 8.623e-03 -1.730 0.0837 .
BMI 4.034e-02 1.041e+00 4.216e-02 0.957 0.3387
MedHx_CVDyes 1.532e-01 1.166e+00 3.489e-01 0.439 0.6605
stenose0-49% -1.499e-01 8.608e-01 8.796e+03 0.000 1.0000
stenose50-70% 1.616e+01 1.041e+07 5.185e+03 0.003 0.9975
stenose70-90% 1.636e+01 1.269e+07 5.185e+03 0.003 0.9975
stenose90-99% 1.621e+01 1.100e+07 5.185e+03 0.003 0.9975
stenose100% (Occlusion) NA NA 0.000e+00 NA NA
stenoseNA NA NA 0.000e+00 NA NA
stenose50-99% NA NA 0.000e+00 NA NA
stenose70-99% NA NA 0.000e+00 NA NA
stenose99 NA NA 0.000e+00 NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] 1.041e+00 9.608e-01 0.5484 1.975
Age 1.042e+00 9.594e-01 0.9965 1.090
Gendermale 2.675e+00 3.738e-01 1.0683 6.700
ORdate_year 7.588e-01 1.318e+00 0.5679 1.014
Hypertension.compositeno 7.914e-01 1.264e+00 0.2729 2.295
Hypertension.compositeyes NA NA NA NA
DiabetesStatusDiabetes 1.726e+00 5.795e-01 0.8562 3.478
SmokerStatusEx-smoker 6.672e-01 1.499e+00 0.3365 1.323
SmokerStatusNever smoked 9.829e-01 1.017e+00 0.3638 2.655
Med.Statin.LLDno 9.446e-01 1.059e+00 0.4641 1.923
Med.Statin.LLDyes NA NA NA NA
Med.all.antiplateletno 1.828e+00 5.472e-01 0.7427 4.497
Med.all.antiplateletyes NA NA NA NA
GFR_MDRD 9.852e-01 1.015e+00 0.9687 1.002
BMI 1.041e+00 9.605e-01 0.9586 1.131
MedHx_CVDyes 1.166e+00 8.580e-01 0.5883 2.309
stenose0-49% 8.608e-01 1.162e+00 0.0000 Inf
stenose50-70% 1.041e+07 9.610e-08 0.0000 Inf
stenose70-90% 1.269e+07 7.881e-08 0.0000 Inf
stenose90-99% 1.100e+07 9.094e-08 0.0000 Inf
stenose100% (Occlusion) NA NA NA NA
stenoseNA NA NA NA NA
stenose50-99% NA NA NA NA
stenose70-99% NA NA NA NA
stenose99 NA NA NA NA
Concordance= 0.725 (se = 0.036 )
Likelihood ratio test= 25.04 on 17 df, p=0.09
Wald test = 15.97 on 17 df, p=0.5
Score (logrank) test = 24.24 on 17 df, p=0.1
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_rank ' and its association to ' epcoronary.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epcoronary.3years
Protein...................: MCP1_rank
Effect size...............: 0.039956
Standard error............: 0.326862
Odds ratio (effect size)..: 1.041
Lower 95% CI..............: 0.548
Upper 95% CI..............: 1.975
T-value...................: 0.122242
P-value...................: 0.9027075
Sample size in model......: 493
Number of events..........: 42
* Analyzing the effect of plaque proteins on [epcvdeath.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ml_2015_rank]; 1 out of 2 proteins.
> cross tabulation of MCP1_pg_ml_2015_rank-stratum.
[-3.34125,0.00209) [ 0.00209,3.34125]
600 599
> fitting the model for MCP1_pg_ml_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = TEMP.DF)
n= 1029, number of events= 33
(1394 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 3.757e-02 1.038e+00 3.794e-01 0.099 0.921123
Age 7.047e-02 1.073e+00 2.723e-02 2.588 0.009658 **
Gendermale 1.226e+00 3.407e+00 5.594e-01 2.191 0.028427 *
ORdate_year -7.706e-02 9.258e-01 7.153e-02 -1.077 0.281331
Hypertension.compositeno -1.773e+01 2.000e-08 3.957e+03 -0.004 0.996425
Hypertension.compositeyes NA NA 0.000e+00 NA NA
DiabetesStatusDiabetes -9.565e-03 9.905e-01 4.279e-01 -0.022 0.982165
SmokerStatusEx-smoker -5.440e-01 5.804e-01 4.052e-01 -1.342 0.179449
SmokerStatusNever smoked -3.778e-01 6.854e-01 6.197e-01 -0.610 0.542134
Med.Statin.LLDno 1.675e-02 1.017e+00 4.225e-01 0.040 0.968375
Med.Statin.LLDyes NA NA 0.000e+00 NA NA
Med.all.antiplateletno 1.115e+00 3.050e+00 4.178e-01 2.669 0.007602 **
Med.all.antiplateletyes NA NA 0.000e+00 NA NA
GFR_MDRD -3.284e-02 9.677e-01 9.422e-03 -3.485 0.000491 ***
BMI 8.583e-02 1.090e+00 5.240e-02 1.638 0.101466
MedHx_CVDyes 7.410e-01 2.098e+00 4.621e-01 1.603 0.108837
stenose0-49% -2.059e+01 1.144e-09 2.687e+04 -0.001 0.999389
stenose50-70% -1.271e+00 2.805e-01 1.263e+00 -1.007 0.314004
stenose70-90% -1.782e+00 1.683e-01 1.122e+00 -1.587 0.112409
stenose90-99% -1.497e+00 2.239e-01 1.150e+00 -1.301 0.193259
stenose100% (Occlusion) -1.989e+01 2.301e-09 1.983e+04 -0.001 0.999200
stenoseNA NA NA 0.000e+00 NA NA
stenose50-99% -1.943e+01 3.629e-09 3.412e+04 -0.001 0.999546
stenose70-99% NA NA 0.000e+00 NA NA
stenose99 NA NA 0.000e+00 NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00209,3.34125] 1.038e+00 9.631e-01 0.49356 2.1842
Age 1.073e+00 9.320e-01 1.01724 1.1318
Gendermale 3.407e+00 2.935e-01 1.13818 10.1980
ORdate_year 9.258e-01 1.080e+00 0.80473 1.0652
Hypertension.compositeno 2.000e-08 5.000e+07 0.00000 Inf
Hypertension.compositeyes NA NA NA NA
DiabetesStatusDiabetes 9.905e-01 1.010e+00 0.42820 2.2911
SmokerStatusEx-smoker 5.804e-01 1.723e+00 0.26230 1.2843
SmokerStatusNever smoked 6.854e-01 1.459e+00 0.20343 2.3092
Med.Statin.LLDno 1.017e+00 9.834e-01 0.44427 2.3276
Med.Statin.LLDyes NA NA NA NA
Med.all.antiplateletno 3.050e+00 3.278e-01 1.34493 6.9181
Med.all.antiplateletyes NA NA NA NA
GFR_MDRD 9.677e-01 1.033e+00 0.94999 0.9857
BMI 1.090e+00 9.178e-01 0.98326 1.2075
MedHx_CVDyes 2.098e+00 4.766e-01 0.84810 5.1899
stenose0-49% 1.144e-09 8.743e+08 0.00000 Inf
stenose50-70% 2.805e-01 3.566e+00 0.02361 3.3316
stenose70-90% 1.683e-01 5.941e+00 0.01865 1.5191
stenose90-99% 2.239e-01 4.466e+00 0.02349 2.1340
stenose100% (Occlusion) 2.301e-09 4.346e+08 0.00000 Inf
stenoseNA NA NA NA NA
stenose50-99% 3.629e-09 2.755e+08 0.00000 Inf
stenose70-99% NA NA NA NA
stenose99 NA NA NA NA
Concordance= 0.844 (se = 0.031 )
Likelihood ratio test= 61.1 on 19 df, p=3e-06
Wald test = 21.88 on 19 df, p=0.3
Score (logrank) test = 57.18 on 19 df, p=1e-05
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ml_2015_rank ' and its association to ' epcvdeath.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epcvdeath.3years
Protein...................: MCP1_pg_ml_2015_rank
Effect size...............: 0.037572
Standard error............: 0.37944
Odds ratio (effect size)..: 1.038
Lower 95% CI..............: 0.494
Upper 95% CI..............: 2.184
T-value...................: 0.099019
P-value...................: 0.921123
Sample size in model......: 1029
Number of events..........: 33
> processing [MCP1_rank]; 2 out of 2 proteins.
> cross tabulation of MCP1_rank-stratum.
[-3.12162,0.00225) [ 0.00225,3.12162]
278 278
> fitting the model for MCP1_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = TEMP.DF)
n= 493, number of events= 23
(1930 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] -2.418e-01 7.852e-01 4.414e-01 -0.548 0.5839
Age 5.035e-02 1.052e+00 3.206e-02 1.571 0.1163
Gendermale 1.075e+00 2.930e+00 6.727e-01 1.598 0.1100
ORdate_year -1.134e-01 8.928e-01 1.951e-01 -0.581 0.5610
Hypertension.compositeno -1.802e+01 1.487e-08 4.552e+03 -0.004 0.9968
Hypertension.compositeyes NA NA 0.000e+00 NA NA
DiabetesStatusDiabetes 5.064e-01 1.659e+00 5.312e-01 0.953 0.3405
SmokerStatusEx-smoker -6.006e-01 5.485e-01 4.733e-01 -1.269 0.2045
SmokerStatusNever smoked -1.010e-01 9.039e-01 7.296e-01 -0.138 0.8899
Med.Statin.LLDno 7.451e-01 2.107e+00 4.587e-01 1.625 0.1043
Med.Statin.LLDyes NA NA 0.000e+00 NA NA
Med.all.antiplateletno 5.400e-01 1.716e+00 6.750e-01 0.800 0.4237
Med.all.antiplateletyes NA NA 0.000e+00 NA NA
GFR_MDRD -2.044e-02 9.798e-01 1.039e-02 -1.968 0.0491 *
BMI 2.176e-02 1.022e+00 5.934e-02 0.367 0.7139
MedHx_CVDyes 1.312e+00 3.713e+00 6.416e-01 2.044 0.0409 *
stenose0-49% -7.481e-01 4.733e-01 3.720e+04 0.000 1.0000
stenose50-70% 4.915e-01 1.635e+00 2.257e+04 0.000 1.0000
stenose70-90% 1.839e+01 9.706e+07 2.097e+04 0.001 0.9993
stenose90-99% 1.809e+01 7.220e+07 2.097e+04 0.001 0.9993
stenose100% (Occlusion) NA NA 0.000e+00 NA NA
stenoseNA NA NA 0.000e+00 NA NA
stenose50-99% NA NA 0.000e+00 NA NA
stenose70-99% NA NA 0.000e+00 NA NA
stenose99 NA NA 0.000e+00 NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00225,3.12162] 7.852e-01 1.274e+00 0.3306 1.8651
Age 1.052e+00 9.509e-01 0.9876 1.1199
Gendermale 2.930e+00 3.413e-01 0.7839 10.9510
ORdate_year 8.928e-01 1.120e+00 0.6091 1.3086
Hypertension.compositeno 1.487e-08 6.727e+07 0.0000 Inf
Hypertension.compositeyes NA NA NA NA
DiabetesStatusDiabetes 1.659e+00 6.027e-01 0.5858 4.7001
SmokerStatusEx-smoker 5.485e-01 1.823e+00 0.2169 1.3869
SmokerStatusNever smoked 9.039e-01 1.106e+00 0.2163 3.7768
Med.Statin.LLDno 2.107e+00 4.747e-01 0.8574 5.1763
Med.Statin.LLDyes NA NA NA NA
Med.all.antiplateletno 1.716e+00 5.827e-01 0.4571 6.4433
Med.all.antiplateletyes NA NA NA NA
GFR_MDRD 9.798e-01 1.021e+00 0.9600 0.9999
BMI 1.022e+00 9.785e-01 0.9098 1.1480
MedHx_CVDyes 3.713e+00 2.693e-01 1.0557 13.0574
stenose0-49% 4.733e-01 2.113e+00 0.0000 Inf
stenose50-70% 1.635e+00 6.117e-01 0.0000 Inf
stenose70-90% 9.706e+07 1.030e-08 0.0000 Inf
stenose90-99% 7.220e+07 1.385e-08 0.0000 Inf
stenose100% (Occlusion) NA NA NA NA
stenoseNA NA NA NA NA
stenose50-99% NA NA NA NA
stenose70-99% NA NA NA NA
stenose99 NA NA NA NA
Concordance= 0.815 (se = 0.039 )
Likelihood ratio test= 33.08 on 17 df, p=0.01
Wald test = 12.37 on 17 df, p=0.8
Score (logrank) test = 27.71 on 17 df, p=0.05
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_rank ' and its association to ' epcvdeath.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epcvdeath.3years
Protein...................: MCP1_rank
Effect size...............: -0.241771
Standard error............: 0.441385
Odds ratio (effect size)..: 0.785
Lower 95% CI..............: 0.331
Upper 95% CI..............: 1.865
T-value...................: -0.547755
P-value...................: 0.5838601
Sample size in model......: 493
Number of events..........: 23
cat("- Edit the column names...\n")- Edit the column names...
colnames(COX.results) = c("Dataset", "Outcome", "CpG",
"Beta", "s.e.m.",
"HR", "low95CI", "up95CI",
"Z-value", "P-value", "SampleSize", "N_events")
cat("- Correct the variable types...\n")- Correct the variable types...
COX.results$Beta <- as.numeric(COX.results$Beta)
COX.results$s.e.m. <- as.numeric(COX.results$s.e.m.)
COX.results$HR <- as.numeric(COX.results$HR)
COX.results$low95CI <- as.numeric(COX.results$low95CI)
COX.results$up95CI <- as.numeric(COX.results$up95CI)
COX.results$`Z-value` <- as.numeric(COX.results$`Z-value`)
COX.results$`P-value` <- as.numeric(COX.results$`P-value`)
COX.results$SampleSize <- as.numeric(COX.results$SampleSize)
COX.results$N_events <- as.numeric(COX.results$N_events)
AEDB.CEA.COX.results <- COX.results
# Save the data
cat("- Writing results to Excel-file...\n")- Writing results to Excel-file...
head.style <- createStyle(textDecoration = "BOLD")
write.xlsx(AEDB.CEA.COX.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Cox.2G.MODEL2.xlsx"),
creator = "Sander W. van der Laan",
sheetName = "Results", headerStyle = head.style,
row.names = FALSE, col.names = TRUE, overwrite = TRUE)
# Removing intermediates
cat("- Removing intermediate files...\n")- Removing intermediate files...
rm(TEMP.DF, protein, fit, cox, coxplot, COX.results, COX.results.TEMP, head.style, AEDB.CEA.COX.results)
rm(head.style)object 'head.style' not found
# Set up a dataframe to receive results
COX.results <- data.frame(matrix(NA, ncol = 12, nrow = 0))
# Looping over each protein/endpoint/time combination
for (i in 1:length(times30)){
eptime = times30[i]
ep = endpoints30[i]
cat(paste0("* Analyzing the effect of plaque proteins on [",ep,"].\n"))
cat(" - creating temporary SE for this work.\n")
TEMP.DF = as.data.frame(AEDB.CEA)
cat(" - making a 'Surv' object and adding this to temporary dataframe.\n")
TEMP.DF$event <- as.integer(TEMP.DF[,ep])
TEMP.DF$y <- Surv(time = TEMP.DF[,eptime], event = TEMP.DF$event)
cat(" - making strata of each of the plaque proteins and start survival analysis.\n")
for (protein in 1:length(TRAITS.PROTEIN.RANK)){
cat(paste0(" > processing [",TRAITS.PROTEIN.RANK[protein],"]; ",protein," out of ",length(TRAITS.PROTEIN.RANK)," proteins.\n"))
# splitting into two groups
TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]] <- cut2(TEMP.DF[,TRAITS.PROTEIN.RANK[protein]], g = 2)
cat(paste0(" > cross tabulation of ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
show(table(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]))
cat(paste0("\n > fitting the model for ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
fit <- survfit(as.formula(paste0("y ~ ", TRAITS.PROTEIN.RANK[protein])), data = TEMP.DF)
cat(paste0("\n > make a Kaplan-Meier-shizzle...\n"))
# make Kaplan-Meier curve and save it
show(ggsurvplot(fit, data = TEMP.DF,
palette = c("#DB003F", "#1290D9"),
# palete = c("F59D10", "#DB003F", "#49A01D", "#1290D9"),
linetype = c(1,2),
ylim = c(0.75, 1),
# linetype = c(1,2,3,4),
# conf.int = FALSE, conf.int.fill = "#595A5C", conf.int.alpha = 0.1,
pval = FALSE, pval.method = FALSE, pval.size = 4,
risk.table = TRUE, risk.table.y.text = FALSE, tables.y.text.col = TRUE, fontsize = 4,
censor = FALSE,
legend = "right",
legend.title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
legend.labs = c("low", "high"),
title = paste0("Risk of ",ep,""), xlab = "Time [days]", font.main = c(16, "bold", "black")))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.survival.",ep,".2G.",
TRAITS.PROTEIN.RANK[protein],".30days.pdf"), width = 12, height = 10, onefile = FALSE)
cat(paste0("\n > perform the Cox-regression fashizzle and plot it...\n"))
### Do Cox-regression and plot it
### MODEL 1 (Simple model)
cox = coxph(Surv(TEMP.DF[,eptime], event) ~ TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]+Age+Gender + ORdate_year, data = TEMP.DF)
coxplot = coxph(Surv(TEMP.DF[,eptime], event) ~ strata(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]])+Age+Gender + ORdate_year, data = TEMP.DF)
plot(survfit(coxplot), main = paste0("Cox proportional hazard of [",ep,"] per [",eptime,"]."),
ylim = c(0.75, 1), xlim = c(0,3), col = c("#595A5C", "#DB003F", "#1290D9"),
# ylim = c(0, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
ylab = "Suvival probability", xlab = "FU time [days]",
mark.time = FALSE, axes = FALSE, bty = "n")
legend("topright",
c("low", "high"),
title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
bty = "n")
axis(side = 1, at = seq(0, 3, by = 1))
axis(side = 2, at = seq(0, 1, by = 0.2))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.Cox.",ep,".2G.",
# Today,".AEDB.CEA.Cox.",ep,".4G.",
TRAITS.PROTEIN.RANK[protein],".MODEL1.30days.pdf"), height = 12, width = 10, onefile = TRUE)
show(summary(cox))
cat(paste0("\n > writing the Cox-regression fashizzle to Excel...\n"))
COX.results.TEMP <- data.frame(matrix(NA, ncol = 12, nrow = 0))
COX.results.TEMP[1,] = COX.STAT(cox, "AEDB.CEA", ep, TRAITS.PROTEIN.RANK[protein])
COX.results = rbind(COX.results, COX.results.TEMP)
}
}
cat("- Edit the column names...\n")
colnames(COX.results) = c("Dataset", "Outcome", "CpG",
"Beta", "s.e.m.",
"HR", "low95CI", "up95CI",
"Z-value", "P-value", "SampleSize", "N_events")
cat("- Correct the variable types...\n")
COX.results$Beta <- as.numeric(COX.results$Beta)
COX.results$s.e.m. <- as.numeric(COX.results$s.e.m.)
COX.results$HR <- as.numeric(COX.results$HR)
COX.results$low95CI <- as.numeric(COX.results$low95CI)
COX.results$up95CI <- as.numeric(COX.results$up95CI)
COX.results$`Z-value` <- as.numeric(COX.results$`Z-value`)
COX.results$`P-value` <- as.numeric(COX.results$`P-value`)
COX.results$SampleSize <- as.numeric(COX.results$SampleSize)
COX.results$N_events <- as.numeric(COX.results$N_events)
AEDB.CEA.COX.results <- COX.results
# Save the data
library(openxlsx)
cat("- Writing results to Excel-file...\n")
head.style <- createStyle(textDecoration = "BOLD")
write.xlsx(AEDB.CEA.COX.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Cox.2G.MODEL1.30days.xlsx"),
creator = "Sander W. van der Laan",
sheetName = "Results", headerStyle = head.style,
row.names = FALSE, col.names = TRUE, overwrite = TRUE)
# Removing intermediates
cat("- Removing intermediate files...\n")
#rm(TEMP.DF, protein, fit, cox, coxplot, COX.results, COX.results.TEMP, head.style, AEDB.CEA.COX.results)
#rm(head.style)# Set up a dataframe to receive results
COX.results <- data.frame(matrix(NA, ncol = 12, nrow = 0))
# Looping over each protein/endpoint/time combination
for (i in 1:length(times30)){
eptime = times30[i]
ep = endpoints30[i]
cat(paste0("* Analyzing the effect of plaque proteins on [",ep,"].\n"))
cat(" - creating temporary SE for this work.\n")
TEMP.DF = as.data.frame(AEDB.CEA)
cat(" - making a 'Surv' object and adding this to temporary dataframe.\n")
TEMP.DF$event <- as.integer(TEMP.DF[,ep])
#as.integer(TEMP.DF[,ep] == "Excluded")
TEMP.DF$y <- Surv(time = TEMP.DF[,eptime], event = TEMP.DF$event)
cat(" - making strata of each of the plaque proteins and start survival analysis.\n")
for (protein in 1:length(TRAITS.PROTEIN.RANK)){
cat(paste0(" > processing [",TRAITS.PROTEIN.RANK[protein],"]; ",protein," out of ",length(TRAITS.PROTEIN.RANK)," proteins.\n"))
# splitting into two groups
TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]] <- cut2(TEMP.DF[,TRAITS.PROTEIN.RANK[protein]], g = 2)
cat(paste0(" > cross tabulation of ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
show(table(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]))
cat(paste0("\n > fitting the model for ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
fit <- survfit(as.formula(paste0("y ~ ", TRAITS.PROTEIN.RANK[protein])), data = TEMP.DF)
cat(paste0("\n > make a Kaplan-Meier-shizzle...\n"))
# make Kaplan-Meier curve and save it
show(ggsurvplot(fit, data = TEMP.DF,
palette = c("#DB003F", "#1290D9"),
# palete = c("F59D10", "#DB003F", "#49A01D", "#1290D9"),
linetype = c(1,2),
ylim = c(0.75, 1),
# linetype = c(1,2,3,4),
# conf.int = FALSE, conf.int.fill = "#595A5C", conf.int.alpha = 0.1,
pval = FALSE, pval.method = FALSE, pval.size = 4,
risk.table = TRUE, risk.table.y.text = FALSE, tables.y.text.col = TRUE, fontsize = 4,
censor = FALSE,
legend = "right",
legend.title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
legend.labs = c("low", "high"),
title = paste0("Risk of ",ep,""), xlab = "Time [days]", font.main = c(16, "bold", "black")))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.survival.",ep,".2G.",
TRAITS.PROTEIN.RANK[protein],".30days.pdf"), width = 12, height = 10, onefile = FALSE)
cat(paste0("\n > perform the Cox-regression fashizzle and plot it...\n"))
### Do Cox-regression and plot it
### MODEL 2 adjusted for age, sex, hypertension, diabetes, smoking, LDL-C levels, lipid-lowering drugs, antiplatelet drugs, eGFR, BMI, history of CVD, level of stenosis
cox = coxph(Surv(TEMP.DF[,eptime], event) ~ TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]+Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI + MedHx_CVD + stenose, data = TEMP.DF)
coxplot = coxph(Surv(TEMP.DF[,eptime], event) ~ strata(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]])+Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI + MedHx_CVD + stenose, data = TEMP.DF)
plot(survfit(coxplot), main = paste0("Cox proportional hazard of [",ep,"] per [",eptime,"]."),
ylim = c(0.75, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
# ylim = c(0, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
ylab = "Suvival probability", xlab = "FU time [days]",
mark.time = FALSE, axes = FALSE, bty = "n")
legend("topright",
c("low", "high"),
title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
bty = "n")
axis(side = 1, at = seq(0, 3, by = 1))
axis(side = 2, at = seq(0, 1, by = 0.2))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.Cox.",ep,".2G.",
# Today,".AEDB.CEA.Cox.",ep,".4G.",
TRAITS.PROTEIN.RANK[protein],".MODEL2.30days.pdf"), height = 12, width = 10, onefile = TRUE)
show(summary(cox))
cat(paste0("\n > writing the Cox-regression fashizzle to Excel...\n"))
COX.results.TEMP <- data.frame(matrix(NA, ncol = 12, nrow = 0))
COX.results.TEMP[1,] = COX.STAT(cox, "AEDB.CEA", ep, TRAITS.PROTEIN.RANK[protein])
COX.results = rbind(COX.results, COX.results.TEMP)
}
}
cat("- Edit the column names...\n")
colnames(COX.results) = c("Dataset", "Outcome", "CpG",
"Beta", "s.e.m.",
"HR", "low95CI", "up95CI",
"Z-value", "P-value", "SampleSize", "N_events")
cat("- Correct the variable types...\n")
COX.results$Beta <- as.numeric(COX.results$Beta)
COX.results$s.e.m. <- as.numeric(COX.results$s.e.m.)
COX.results$HR <- as.numeric(COX.results$HR)
COX.results$low95CI <- as.numeric(COX.results$low95CI)
COX.results$up95CI <- as.numeric(COX.results$up95CI)
COX.results$`Z-value` <- as.numeric(COX.results$`Z-value`)
COX.results$`P-value` <- as.numeric(COX.results$`P-value`)
COX.results$SampleSize <- as.numeric(COX.results$SampleSize)
COX.results$N_events <- as.numeric(COX.results$N_events)
AEDB.CEA.COX.results <- COX.results
# Save the data
cat("- Writing results to Excel-file...\n")
head.style <- createStyle(textDecoration = "BOLD")
write.xlsx(AEDB.CEA.COX.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Cox.2G.MODEL2.30days.xlsx"),
creator = "Sander W. van der Laan",
sheetName = "Results", headerStyle = head.style,
row.names = FALSE, col.names = TRUE, overwrite = TRUE)
# Removing intermediates
cat("- Removing intermediate files...\n")
rm(TEMP.DF, protein, fit, cox, coxplot, COX.results, COX.results.TEMP, head.style, AEDB.CEA.COX.results)# Set up a dataframe to receive results
COX.results <- data.frame(matrix(NA, ncol = 12, nrow = 0))
# Looping over each protein/endpoint/time combination
for (i in 1:length(times90)){
eptime = times90[i]
ep = endpoints90[i]
cat(paste0("* Analyzing the effect of plaque proteins on [",ep,"].\n"))
cat(" - creating temporary SE for this work.\n")
TEMP.DF = as.data.frame(AEDB.CEA)
cat(" - making a 'Surv' object and adding this to temporary dataframe.\n")
TEMP.DF$event <- as.integer(TEMP.DF[,ep])
TEMP.DF$y <- Surv(time = TEMP.DF[,eptime], event = TEMP.DF$event)
cat(" - making strata of each of the plaque proteins and start survival analysis.\n")
for (protein in 1:length(TRAITS.PROTEIN.RANK)){
cat(paste0(" > processing [",TRAITS.PROTEIN.RANK[protein],"]; ",protein," out of ",length(TRAITS.PROTEIN.RANK)," proteins.\n"))
# splitting into two groups
TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]] <- cut2(TEMP.DF[,TRAITS.PROTEIN.RANK[protein]], g = 2)
cat(paste0(" > cross tabulation of ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
show(table(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]))
cat(paste0("\n > fitting the model for ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
fit <- survfit(as.formula(paste0("y ~ ", TRAITS.PROTEIN.RANK[protein])), data = TEMP.DF)
cat(paste0("\n > make a Kaplan-Meier-shizzle...\n"))
# make Kaplan-Meier curve and save it
show(ggsurvplot(fit, data = TEMP.DF,
palette = c("#DB003F", "#1290D9"),
# palete = c("F59D10", "#DB003F", "#49A01D", "#1290D9"),
linetype = c(1,2),
ylim = c(0.75, 1),
# linetype = c(1,2,3,4),
# conf.int = FALSE, conf.int.fill = "#595A5C", conf.int.alpha = 0.1,
pval = FALSE, pval.method = FALSE, pval.size = 4,
risk.table = TRUE, risk.table.y.text = FALSE, tables.y.text.col = TRUE, fontsize = 4,
censor = FALSE,
legend = "right",
legend.title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
legend.labs = c("low", "high"),
title = paste0("Risk of ",ep,""), xlab = "Time [days]", font.main = c(16, "bold", "black")))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.survival.",ep,".2G.",
TRAITS.PROTEIN.RANK[protein],".90days.pdf"), width = 12, height = 10, onefile = FALSE)
cat(paste0("\n > perform the Cox-regression fashizzle and plot it...\n"))
### Do Cox-regression and plot it
### MODEL 1 (Simple model)
cox = coxph(Surv(TEMP.DF[,eptime], event) ~ TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]+Age+Gender + ORdate_year, data = TEMP.DF)
coxplot = coxph(Surv(TEMP.DF[,eptime], event) ~ strata(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]])+Age+Gender + ORdate_year, data = TEMP.DF)
plot(survfit(coxplot), main = paste0("Cox proportional hazard of [",ep,"] per [",eptime,"]."),
ylim = c(0.75, 1), xlim = c(0,3), col = c("#595A5C", "#DB003F", "#1290D9"),
# ylim = c(0, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
ylab = "Suvival probability", xlab = "FU time [days]",
mark.time = FALSE, axes = FALSE, bty = "n")
legend("topright",
c("low", "high"),
title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
bty = "n")
axis(side = 1, at = seq(0, 3, by = 1))
axis(side = 2, at = seq(0, 1, by = 0.2))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.Cox.",ep,".2G.",
# Today,".AEDB.CEA.Cox.",ep,".4G.",
TRAITS.PROTEIN.RANK[protein],".MODEL1.90days.pdf"), height = 12, width = 10, onefile = TRUE)
show(summary(cox))
cat(paste0("\n > writing the Cox-regression fashizzle to Excel...\n"))
COX.results.TEMP <- data.frame(matrix(NA, ncol = 12, nrow = 0))
COX.results.TEMP[1,] = COX.STAT(cox, "AEDB.CEA", ep, TRAITS.PROTEIN.RANK[protein])
COX.results = rbind(COX.results, COX.results.TEMP)
}
}
cat("- Edit the column names...\n")
colnames(COX.results) = c("Dataset", "Outcome", "CpG",
"Beta", "s.e.m.",
"HR", "low95CI", "up95CI",
"Z-value", "P-value", "SampleSize", "N_events")
cat("- Correct the variable types...\n")
COX.results$Beta <- as.numeric(COX.results$Beta)
COX.results$s.e.m. <- as.numeric(COX.results$s.e.m.)
COX.results$HR <- as.numeric(COX.results$HR)
COX.results$low95CI <- as.numeric(COX.results$low95CI)
COX.results$up95CI <- as.numeric(COX.results$up95CI)
COX.results$`Z-value` <- as.numeric(COX.results$`Z-value`)
COX.results$`P-value` <- as.numeric(COX.results$`P-value`)
COX.results$SampleSize <- as.numeric(COX.results$SampleSize)
COX.results$N_events <- as.numeric(COX.results$N_events)
AEDB.CEA.COX.results <- COX.results
# Save the data
library(openxlsx)
cat("- Writing results to Excel-file...\n")
head.style <- createStyle(textDecoration = "BOLD")
write.xlsx(AEDB.CEA.COX.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Cox.2G.MODEL1.90days.xlsx"),
creator = "Sander W. van der Laan",
sheetName = "Results", headerStyle = head.style,
row.names = FALSE, col.names = TRUE, overwrite = TRUE)
# Removing intermediates
cat("- Removing intermediate files...\n")
#rm(TEMP.DF, protein, fit, cox, coxplot, COX.results, COX.results.TEMP, head.style, AEDB.CEA.COX.results)
#rm(head.style)# Set up a dataframe to receive results
COX.results <- data.frame(matrix(NA, ncol = 12, nrow = 0))
# Looping over each protein/endpoint/time combination
for (i in 1:length(times90)){
eptime = times90[i]
ep = endpoints90[i]
cat(paste0("* Analyzing the effect of plaque proteins on [",ep,"].\n"))
cat(" - creating temporary SE for this work.\n")
TEMP.DF = as.data.frame(AEDB.CEA)
cat(" - making a 'Surv' object and adding this to temporary dataframe.\n")
TEMP.DF$event <- as.integer(TEMP.DF[,ep])
#as.integer(TEMP.DF[,ep] == "Excluded")
TEMP.DF$y <- Surv(time = TEMP.DF[,eptime], event = TEMP.DF$event)
cat(" - making strata of each of the plaque proteins and start survival analysis.\n")
for (protein in 1:length(TRAITS.PROTEIN.RANK)){
cat(paste0(" > processing [",TRAITS.PROTEIN.RANK[protein],"]; ",protein," out of ",length(TRAITS.PROTEIN.RANK)," proteins.\n"))
# splitting into two groups
TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]] <- cut2(TEMP.DF[,TRAITS.PROTEIN.RANK[protein]], g = 2)
cat(paste0(" > cross tabulation of ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
show(table(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]))
cat(paste0("\n > fitting the model for ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
fit <- survfit(as.formula(paste0("y ~ ", TRAITS.PROTEIN.RANK[protein])), data = TEMP.DF)
cat(paste0("\n > make a Kaplan-Meier-shizzle...\n"))
# make Kaplan-Meier curve and save it
show(ggsurvplot(fit, data = TEMP.DF,
palette = c("#DB003F", "#1290D9"),
# palete = c("F59D10", "#DB003F", "#49A01D", "#1290D9"),
linetype = c(1,2),
ylim = c(0.75, 1),
# linetype = c(1,2,3,4),
# conf.int = FALSE, conf.int.fill = "#595A5C", conf.int.alpha = 0.1,
pval = FALSE, pval.method = FALSE, pval.size = 4,
risk.table = TRUE, risk.table.y.text = FALSE, tables.y.text.col = TRUE, fontsize = 4,
censor = FALSE,
legend = "right",
legend.title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
legend.labs = c("low", "high"),
title = paste0("Risk of ",ep,""), xlab = "Time [days]", font.main = c(16, "bold", "black")))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.survival.",ep,".2G.",
TRAITS.PROTEIN.RANK[protein],".90days.pdf"), width = 12, height = 10, onefile = FALSE)
cat(paste0("\n > perform the Cox-regression fashizzle and plot it...\n"))
### Do Cox-regression and plot it
### MODEL 2 adjusted for age, sex, hypertension, diabetes, smoking, LDL-C levels, lipid-lowering drugs, antiplatelet drugs, eGFR, BMI, history of CVD, level of stenosis
cox = coxph(Surv(TEMP.DF[,eptime], event) ~ TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]+Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI + MedHx_CVD + stenose, data = TEMP.DF)
coxplot = coxph(Surv(TEMP.DF[,eptime], event) ~ strata(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]])+Age + Gender + ORdate_year + Hypertension.composite + DiabetesStatus + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI + MedHx_CVD + stenose, data = TEMP.DF)
plot(survfit(coxplot), main = paste0("Cox proportional hazard of [",ep,"] per [",eptime,"]."),
ylim = c(0.75, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
# ylim = c(0, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
ylab = "Suvival probability", xlab = "FU time [days]",
mark.time = FALSE, axes = FALSE, bty = "n")
legend("topright",
c("low", "high"),
title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
bty = "n")
axis(side = 1, at = seq(0, 3, by = 1))
axis(side = 2, at = seq(0, 1, by = 0.2))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.Cox.",ep,".2G.",
# Today,".AEDB.CEA.Cox.",ep,".4G.",
TRAITS.PROTEIN.RANK[protein],".MODEL2.90days.pdf"), height = 12, width = 10, onefile = TRUE)
show(summary(cox))
cat(paste0("\n > writing the Cox-regression fashizzle to Excel...\n"))
COX.results.TEMP <- data.frame(matrix(NA, ncol = 12, nrow = 0))
COX.results.TEMP[1,] = COX.STAT(cox, "AEDB.CEA", ep, TRAITS.PROTEIN.RANK[protein])
COX.results = rbind(COX.results, COX.results.TEMP)
}
}
cat("- Edit the column names...\n")
colnames(COX.results) = c("Dataset", "Outcome", "CpG",
"Beta", "s.e.m.",
"HR", "low95CI", "up95CI",
"Z-value", "P-value", "SampleSize", "N_events")
cat("- Correct the variable types...\n")
COX.results$Beta <- as.numeric(COX.results$Beta)
COX.results$s.e.m. <- as.numeric(COX.results$s.e.m.)
COX.results$HR <- as.numeric(COX.results$HR)
COX.results$low95CI <- as.numeric(COX.results$low95CI)
COX.results$up95CI <- as.numeric(COX.results$up95CI)
COX.results$`Z-value` <- as.numeric(COX.results$`Z-value`)
COX.results$`P-value` <- as.numeric(COX.results$`P-value`)
COX.results$SampleSize <- as.numeric(COX.results$SampleSize)
COX.results$N_events <- as.numeric(COX.results$N_events)
AEDB.CEA.COX.results <- COX.results
# Save the data
cat("- Writing results to Excel-file...\n")
head.style <- createStyle(textDecoration = "BOLD")
write.xlsx(AEDB.CEA.COX.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Cox.2G.MODEL2.90days.xlsx"),
creator = "Sander W. van der Laan",
sheetName = "Results", headerStyle = head.style,
row.names = FALSE, col.names = TRUE, overwrite = TRUE)
# Removing intermediates
cat("- Removing intermediate files...\n")
rm(TEMP.DF, protein, fit, cox, coxplot, COX.results, COX.results.TEMP, head.style, AEDB.CEA.COX.results)We correlated plaque levels of the biomarkers.
# Installation of ggcorrplot()
# --------------------------------
if(!require(devtools))
install.packages("devtools")
devtools::install_github("kassambara/ggcorrplot")Skipping install of 'ggcorrplot' from a github remote, the SHA1 (ad71a164) has not changed since last install.
Use `force = TRUE` to force installation
library(ggcorrplot)
# Creating matrix - inverse-rank transformation
# --------------------------------
# AEDB.CEA.temp <- subset(AEDB.CEA,
# select = c("IL6_rank", "MCP1_rank", "IL6_pg_ug_2015_rank", "MCP1_pg_ug_2015_rank", "IL6R_pg_ug_2015_rank",
# TRAITS.BIN, TRAITS.CON.RANK)
# )
# AEDB.CEA.temp <- subset(AEDB.CEA,
# select = c("MCP1_rank", "MCP1_pg_ug_2015_rank",
# TRAITS.BIN, TRAITS.CON.RANK)
# )
AEDB.CEA.temp <- subset(AEDB.CEA,
select = c("MCP1_pg_ml_2015_rank",
TRAITS.BIN,
TRAITS.CON.RANK,
"Symptoms.5G", "AsymptSympt", "EP_major", "EP_composite")
)
AEDB.CEA.temp$CalcificationPlaque <- as.numeric(AEDB.CEA.temp$CalcificationPlaque)
AEDB.CEA.temp$CollagenPlaque <- as.numeric(AEDB.CEA.temp$CollagenPlaque)
AEDB.CEA.temp$Fat10Perc <- as.numeric(AEDB.CEA.temp$Fat10Perc)
AEDB.CEA.temp$MAC_binned <- as.numeric(AEDB.CEA.temp$MAC_binned)
AEDB.CEA.temp$SMC_binned <- as.numeric(AEDB.CEA.temp$SMC_binned)
AEDB.CEA.temp$IPH <- as.numeric(AEDB.CEA.temp$IPH)
AEDB.CEA.temp$Symptoms.5G <- as.numeric(AEDB.CEA.temp$Symptoms.5G)
AEDB.CEA.temp$AsymptSympt <- as.numeric(AEDB.CEA.temp$AsymptSympt)
AEDB.CEA.temp$EP_major <- as.numeric(AEDB.CEA.temp$EP_major)
AEDB.CEA.temp$EP_composite <- as.numeric(AEDB.CEA.temp$EP_composite)
str(AEDB.CEA.temp)'data.frame': 2423 obs. of 15 variables:
$ MCP1_pg_ml_2015_rank: num 0.45 1.572 0.67 -1.038 0.326 ...
$ CalcificationPlaque : num 1 1 2 1 2 1 2 1 2 2 ...
$ CollagenPlaque : num 2 2 2 2 2 1 2 2 2 1 ...
$ Fat10Perc : num 2 2 1 2 2 2 1 2 2 2 ...
$ IPH : num 2 1 2 2 2 2 1 2 2 2 ...
$ MAC_binned : num 1 1 1 1 NA 1 1 2 2 1 ...
$ SMC_binned : num 1 2 2 2 1 2 2 2 2 1 ...
$ Macrophages_rank : num 1.121 0.396 0.29 0.32 -2.316 ...
$ SMC_rank : num 1.132 1.27 1.307 0.783 -0.828 ...
$ MAC_SMC_ratio_rank : num 0.236 -0.344 -0.42 -0.174 -2.336 ...
$ VesselDensity_rank : num -0.978 1.1 -0.858 -1.068 -0.231 ...
$ Symptoms.5G : num 5 6 5 2 6 6 2 6 6 5 ...
$ AsymptSympt : num 3 3 3 2 3 3 2 3 3 3 ...
$ EP_major : num 0 0 0 1 1 0 0 0 1 1 ...
$ EP_composite : num 2 2 2 3 3 2 2 2 3 3 ...
AEDB.CEA.matrix.RANK <- as.matrix(AEDB.CEA.temp)
rm(AEDB.CEA.temp)
corr_biomarkers.rank <- round(cor(AEDB.CEA.matrix.RANK,
use = "pairwise.complete.obs", #the correlation or covariance between each pair of variables is computed using all complete pairs of observations on those variables
method = "spearman"), 3)
# corr_biomarkers.rank
corr_biomarkers_p.rank <- ggcorrplot::cor_pmat(AEDB.CEA.matrix.RANK, use = "pairwise.complete.obs", method = "spearman")Cannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with ties
# Add correlation coefficients
# --------------------------------
# argument lab = TRUE
ggcorrplot(corr_biomarkers.rank,
method = "square",
type = "lower",
title = "Cross biomarker correlations",
show.legend = TRUE, legend.title = bquote("Spearman's"~italic(rho)),
ggtheme = ggplot2::theme_minimal, outline.color = "#FFFFFF",
show.diag = TRUE,
hc.order = FALSE,
lab = FALSE,
digits = 3,
# p.mat = corr_biomarkers_p.rank, sig.level = 0.05,
colors = c("#1290D9", "#FFFFFF", "#E55738"))# flattenCorrMatrix
# --------------------------------
# cormat : matrix of the correlation coefficients
# pmat : matrix of the correlation p-values
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
biomarker_row = rownames(cormat)[row(cormat)[ut]],
biomarker_column = rownames(cormat)[col(cormat)[ut]],
spearman_cor =(cormat)[ut],
pval = pmat[ut]
)
}
corr_biomarkers.rank.df <- as.data.table(flattenCorrMatrix(corr_biomarkers.rank, corr_biomarkers_p.rank))
DT::datatable(corr_biomarkers.rank.df)NA# chart of a correlation matrix
# --------------------------------
# Alternative solution https://www.r-graph-gallery.com/199-correlation-matrix-with-ggally.html
install.packages.auto("PerformanceAnalytics")
chart.Correlation.new <- function (R, histogram = TRUE, method = c("pearson", "kendall",
"spearman"), ...)
{
x = checkData(R, method = "matrix")
if (missing(method))
method = method[1]
cormeth <- method
panel.cor <- function(x, y, digits = 2, prefix = "", use = "pairwise.complete.obs",
method = cormeth, cex.cor, ...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- cor(x, y, use = use, method = method)
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste(prefix, txt, sep = "")
if (missing(cex.cor))
cex <- 0.8/strwidth(txt)
test <- cor.test(as.numeric(x), as.numeric(y), method = method)
Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***",
"**", "*", ".", " "))
text(0.5, 0.5, txt, cex = cex * (abs(r) + 0.3)/1.3)
text(0.8, 0.8, Signif, cex = cex, col = 2)
}
f <- function(t) {
dnorm(t, mean = mean(x), sd = sd.xts(x))
}
dotargs <- list(...)
dotargs$method <- NULL
rm(method)
hist.panel = function(x, ... = NULL) {
par(new = TRUE)
hist(x, col = "#1290D9", probability = TRUE, axes = FALSE,
# hist(x, col = "light gray", probability = TRUE, axes = FALSE,
main = "", breaks = "FD")
lines(density(x, na.rm = TRUE), col = "#E55738", lwd = 1)
rug(x)
}
if (histogram)
pairs(x, gap = 0, lower.panel = panel.smooth, upper.panel = panel.cor,
diag.panel = hist.panel, ...)
else pairs(x, gap = 0, lower.panel = panel.smooth, upper.panel = panel.cor, ...)
}
chart.Correlation.new(AEDB.CEA.matrix.RANK, method = "spearman", histogram = TRUE, pch = 3)# alternative chart of a correlation matrix
# --------------------------------
# Alternative solution https://www.r-graph-gallery.com/199-correlation-matrix-with-ggally.html
install.packages.auto("GGally")
# Quick display of two cabapilities of GGally, to assess the distribution and correlation of variables
library(GGally)
# From the help page:
ggpairs(AEDB.CEA,
columns = c("MCP1_pg_ml_2015_rank", TRAITS.BIN, TRAITS.CON.RANK, "Symptoms.5G", "AsymptSympt", "EP_major", "EP_composite"),
columnLabels = c("MCP1",
"Calcification", "Collagen", "Fat 10%", "IPH", "Macrophages (binned)", "SMC (binned)", "Macrophages", "SMC", "Macrophage/SMC", "Vessel density",
"Symptoms", "Symptoms (grouped)", "MACE", "Composite"),
method = c("spearman"),
# ggplot2::aes(colour = Gender),
progress = FALSE)Version: v1.1.0
Last update: 2021-02-11
Written by: Sander W. van der Laan (s.w.vanderlaan-2[at]umcutrecht.nl).
Description: Script to analyse MCP1 from the Ather-Express Biobank Study.
Minimum requirements: R version 3.5.2 (2018-12-20) -- 'Eggshell Igloo', macOS Mojave (10.14.2).
**MoSCoW To-Do List**
The things we Must, Should, Could, and Would have given the time we have.
_M_
_S_
_C_
_W_
**Changes log**
* v1.1.0 Fixes needed for compiling the HTML.
* v1.0.19 Updating for different macOS devices. Addressing reviewer comments.
* v1.0.18 Changed 'asymptomatic vs. symptomatic' DotPlot to have dots instead of lines. Added boxplot for the same.
* v1.0.17 Added regular, and per gender boxplots for risk factors, _etc_. Changed coloring for consistency.
* v1.0.16 Create a pg/mL-only version. Switched to a new .RMD, but kept versioning. Removed the plasma-based analyses.
* v1.0.15 Add sex-stratified plots for MCP1 plaque levels by symptoms and plaque vulnerability index.
* v1.0.14 Add analysis on plasma based MCP1 levels measured through OLINK, n ± 700, limited to symptomatic patients only.
* v1.0.13 Splitting RMDs into plaque-focused, and one including plasma levels of MCP1.
* v1.0.12 Add boxplots of MCP1 levels stratified by confounder/variables.
* v1.0.11 Add analysis of pilot data comparing OLINK-platform based MCP1 levels in plasma and plaque.
* v1.0.10 Add analyses for all three MCP1, MCP1_pg_ml_2015, and MCP1_pg_ug_2015. Add comparison between MCP1, MCP1_pg_ml_2015, and MCP1_pg_ug_2015. Add (and fixed) ordinal regression. Double checked which measurement to use.
* v1.0.9 Added linear regression models for MCP1 vs. cytokines plaque levels. Double checked upload of MACE-plots. Added statistics from correlation (heatmap) to txt-file.
* v1.0.8 Fixed error in MCP1 plasma analysis. It turns out the MCP1 and MCP1_pg_ug_2015 variables are _both_ measured in plaque, in two separate experiments, exp. no. 1 and exp. no. 2, respectively.
* v1.0.7 Fixed the per Age-group MCP1 Box plots. Added correlations with other cytokines in plaques.
* v1.0.6 Only analyses and figures that end up in the final manuscript.
* v1.0.5 Update with 30- and 90-days survival.
* v1.0.4 Updated with Cox-regressions.
* v1.0.3 Included more models.
* v1.0.2 Bugs fixed.
* v1.0.1 Extended with linear and logistic regressions.
* v1.0.0 Inital version.
sessionInfo()R version 4.0.3 (2020-10-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur 10.16
Matrix products: default
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] tools stats graphics grDevices utils datasets methods base
other attached packages:
[1] GGally_2.1.0 PerformanceAnalytics_2.0.4 xts_0.12.1 zoo_1.8-8 ggcorrplot_0.1.3.999
[6] Hmisc_4.4-2 Formula_1.2-4 lattice_0.20-41 survminer_0.4.8 survival_3.2-7
[11] patchwork_1.1.1 ggsci_2.9 openxlsx_4.2.3 ggpubr_0.4.0 tableone_0.12.0
[16] labelled_2.7.0 sjPlot_2.8.7 sjlabelled_1.1.7 haven_2.3.1 devtools_2.3.2
[21] usethis_2.0.0 MASS_7.3-53 DT_0.17 knitr_1.31 forcats_0.5.1
[26] stringr_1.4.0 purrr_0.3.4 tibble_3.0.6 ggplot2_3.3.3 tidyverse_1.3.0
[31] data.table_1.13.6 naniar_0.6.0 tidyr_1.1.2 dplyr_1.0.4 optparse_1.6.6
[36] readr_1.4.0
loaded via a namespace (and not attached):
[1] readxl_1.3.1 backports_1.2.1 plyr_1.8.6 splines_4.0.3 crosstalk_1.1.1 TH.data_1.0-10
[7] digest_0.6.27 htmltools_0.5.1.1 checkmate_2.0.0 magrittr_2.0.1 memoise_2.0.0 cluster_2.1.0
[13] remotes_2.2.0 modelr_0.1.8 sandwich_3.0-0 prettyunits_1.1.1 jpeg_0.1-8.1 colorspace_2.0-0
[19] rvest_0.3.6 mitools_2.4 xfun_0.20 callr_3.5.1 crayon_1.4.1 jsonlite_1.7.2
[25] lme4_1.1-26 glue_1.4.2 gtable_0.3.0 emmeans_1.5.4 sjstats_0.18.1 sjmisc_2.8.6
[31] car_3.0-10 pkgbuild_1.2.0 abind_1.4-5 scales_1.1.1 mvtnorm_1.1-1 DBI_1.1.1
[37] rstatix_0.6.0 ggeffects_1.0.1 Rcpp_1.0.6 htmlTable_2.1.0 xtable_1.8-4 performance_0.7.0
[43] foreign_0.8-81 km.ci_0.5-2 survey_4.0 htmlwidgets_1.5.3 httr_1.4.2 getopt_1.20.3
[49] RColorBrewer_1.1-2 ellipsis_0.3.1 reshape_0.8.8 pkgconfig_2.0.3 farver_2.0.3 nnet_7.3-15
[55] dbplyr_2.1.0 reshape2_1.4.4 tidyselect_1.1.0 labeling_0.4.2 rlang_0.4.10 effectsize_0.4.3
[61] munsell_0.5.0 cellranger_1.1.0 cachem_1.0.3 cli_2.3.0 generics_0.1.0 broom_0.7.4
[67] evaluate_0.14 fastmap_1.1.0 yaml_2.2.1 processx_3.4.5 fs_1.5.0 zip_2.1.1
[73] survMisc_0.5.5 visdat_0.5.3 nlme_3.1-152 xml2_1.3.2 compiler_4.0.3 rstudioapi_0.13
[79] png_0.1-7 curl_4.3 e1071_1.7-4 testthat_3.0.1 ggsignif_0.6.0 reprex_1.0.0
[85] statmod_1.4.35 stringi_1.5.3 ps_1.5.0 parameters_0.11.0 desc_1.2.0 Matrix_1.3-2
[91] nloptr_1.2.2.2 KMsurv_0.1-5 vctrs_0.3.6 pillar_1.4.7 lifecycle_0.2.0 estimability_1.3
[97] insight_0.12.0 latticeExtra_0.6-29 R6_2.5.0 gridExtra_2.3 rio_0.5.16 sessioninfo_1.1.1
[103] codetools_0.2-18 boot_1.3-26 assertthat_0.2.1 pkgload_1.1.0 rprojroot_2.0.2 withr_2.4.1
[109] multcomp_1.4-16 mgcv_1.8-33 bayestestR_0.8.2 hms_1.0.0 quadprog_1.5-8 rpart_4.1-15
[115] grid_4.0.3 coda_0.19-4 class_7.3-18 minqa_1.2.4 rmarkdown_2.6 carData_3.0-4
[121] base64enc_0.1-3 lubridate_1.7.9.2 tinytex_0.29
save.image(paste0(PROJECT_loc, "/",Today,".",PROJECTNAME,".main_analyses.RData"))| © 1979-2021 Sander W. van der Laan | s.w.vanderlaan-2[at]gmail.com | swvanderlaan.github.io. |